From 6edc7bba6ab192595ec860c3b5034e6bed92110a Mon Sep 17 00:00:00 2001 From: zqw_1997 <118182234+zhengqiwen1997@users.noreply.github.com> Date: Wed, 1 Feb 2023 21:38:27 +0800 Subject: [PATCH] remove fluid.initializer.UniformInitializer, ConstantInitializer, NormalInitializer, TruncatedNormalInitializer, XavierInitializer, BilinearInitializer, MSRAInitializer, NumpyArrayInitializer and calculate_gain.. (#49498) * move UniformInitializer and ConstantInitializer * more modify * circular import resolved * another circular import resolved? * more circular import 2 * circular import 3 * change import paddle in metric.py * BuildStrategy import from fluid * modify the framework import path in common.py * change rnn.py import, from static to original framework * change import static in the nn folder * default_main_program should import from common_ops_import * add import paddle in param_attr.py * use core not paddle module for using VarDesc * another old uniform * mistake that use Uniform instead of UniformInitializer * modify UniformInitializer doc * move fluid.NormalInitializer to nn.initializer.NormalInitializer * remove import of Normal in fluid.layers.nn.py * remove more import of old Normal * remove more import of old Normal * sample code modify and tests modify import * is_listen_failed passing arg should be log file * problem solved * a mistake solved * comments resoleved and remove paddle.fluid.initializer.TruncatedNormalInitializer * remove paddle.fluid.initializer.XavierInitializer and paddle.fluid.initializer.MSRAInitializer * remove paddle.fluid.initializer.BilinearInitializer NumpyArrayInitializer and set_global_initializer * change fluid to static * change static to fluid to avoid circular import in distributed_strategy.py * fix example code and test_initializer * ValueType * sample code fix * change set_global_initializer back to fluid * put paddle.static.BuildStrategy.ReduceStrategy into the fuction to avoid circular import * remove calculate_gain, delete BilinearInitializer and revert set_global_initializer * change the time of using UniformInitializer, ConstantInitializer, NormalInitializer, TruncatedNormalInitializer, XavierInitializer, MSRAInitializer, NumpyArrayInitializer as few as possible * fix argument incampatible * fix more arg incompatible * fix test_prelu_op_xpu.py Constant * fix inaccurate doc * more doc fix: default value --- python/paddle/common_ops_import.py | 1 - .../fleet/base/distributed_strategy.py | 3 +- .../distributed/fleet/layers/mpu/random.py | 2 +- .../fleet/meta_optimizers/dgc_optimizer.py | 4 +- .../distributed/fleet/metrics/metric.py | 2 +- python/paddle/fluid/compiler.py | 6 +- .../paddle/fluid/contrib/layers/metric_op.py | 6 +- python/paddle/fluid/contrib/layers/nn.py | 32 +- python/paddle/fluid/evaluator.py | 1 - .../incubate/fleet/tests/fleet_deep_ctr.py | 6 +- python/paddle/fluid/initializer.py | 1218 +---------------- python/paddle/fluid/install_check.py | 5 +- python/paddle/fluid/layer_helper.py | 7 +- python/paddle/fluid/layers/io.py | 2 +- python/paddle/fluid/layers/nn.py | 8 +- python/paddle/fluid/metrics.py | 1 - python/paddle/fluid/optimizer.py | 20 +- python/paddle/fluid/param_attr.py | 13 +- .../unittests/auto_parallel_autoconvert.py | 9 +- .../unittests/auto_parallel_save_load.py | 5 +- .../collective/column_parallel_linear_api.py | 8 +- .../fleet/parallel_dygraph_transformer.py | 10 +- .../collective/fleet/pipeline_mnist.py | 8 +- .../fleet/pipeline_mnist_multi_device.py | 8 +- .../fleet/pipeline_mnist_one_device.py | 6 +- .../fleet/static_model_parallel_by_col.py | 6 +- .../fleet/static_model_parallel_by_row.py | 8 +- .../fleet/static_model_parallel_embedding.py | 6 +- .../multinode/dygraph_hybrid_dpppmp.py | 2 +- .../multinode/dygraph_hybrid_fp16.py | 2 +- .../multinode/dygraph_hybrid_recompute.py | 2 +- .../collective/parallel_embedding_api.py | 4 +- .../collective/row_parallel_linear_api.py | 4 +- .../tests/unittests/dist_allreduce_op.py | 6 +- .../paddle/fluid/tests/unittests/dist_ctr.py | 6 +- .../fluid/tests/unittests/dist_fleet_ctr.py | 6 +- .../dist_fleet_heter_pipeline_ctr.py | 6 +- .../dist_fleet_raw_program_optimizer.py | 6 +- ...et_raw_program_optimizer_fuse_allreduce.py | 6 +- .../tests/unittests/dist_fleet_simnet_bow.py | 16 +- .../dist_fleet_sparse_embedding_ctr.py | 10 +- .../fluid/tests/unittests/dist_mnist.py | 6 +- .../fluid/tests/unittests/dist_se_resnext.py | 8 +- .../unittests/dist_text_classification.py | 8 +- .../fluid/tests/unittests/dist_transformer.py | 16 +- .../fluid/tests/unittests/dist_word2vec.py | 12 +- .../test_auto_parallel_gradient_merge_pass.py | 9 +- .../dygraph_to_static/bert_dygraph_model.py | 10 +- .../unittests/dygraph_to_static/darknet.py | 6 +- .../seq2seq_dygraph_model.py | 6 +- .../dygraph_to_static/simnet_dygraph_model.py | 5 +- .../simnet_dygraph_model_v2.py | 2 +- .../test_basic_api_transformation.py | 14 +- .../unittests/dygraph_to_static/test_bmn.py | 4 +- .../dygraph_to_static/test_convert_call.py | 4 +- .../dygraph_to_static/test_cycle_gan.py | 20 +- .../unittests/dygraph_to_static/test_lac.py | 12 +- .../dygraph_to_static/test_mobile_net.py | 7 +- .../dygraph_to_static/test_ptb_lm.py | 14 +- .../dygraph_to_static/test_resnet.py | 2 +- .../dygraph_to_static/test_se_resnet.py | 6 +- .../dygraph_to_static/test_word2vec.py | 4 +- .../transformer_dygraph_model.py | 10 +- .../unittests/dygraph_to_static/yolov3.py | 4 +- .../unittests/fleet_heter_ps_training.py | 6 +- .../unittests/ir/inference/program_config.py | 3 +- .../test_mkldnn_conv_bias_fuse_pass.py | 12 +- .../ir/inference/test_trt_subgraph_pass.py | 4 +- .../mlu/test_batch_norm_op_mlu_v2.py | 6 +- .../tests/unittests/npu/test_adam_op_npu.py | 4 +- .../unittests/npu/test_run_program_op_npu.py | 4 +- .../parallel_dygraph_sparse_embedding.py | 6 +- .../fluid/tests/unittests/simple_nets.py | 4 +- .../static_model_parallel_fused_attention.py | 6 +- ...static_model_parallel_fused_feedforward.py | 6 +- ..._model_parallel_fused_multi_transformer.py | 6 +- .../fluid/tests/unittests/test_adam_op.py | 6 +- .../unittests/test_auto_parallel_mapper.py | 17 +- .../test_avoid_twice_initialization.py | 5 +- .../fluid/tests/unittests/test_base_layer.py | 2 +- .../tests/unittests/test_batch_norm_op_v2.py | 6 +- .../tests/unittests/test_calc_gradient.py | 4 +- .../tests/unittests/test_communicator_geo.py | 2 +- .../tests/unittests/test_conv2d_layer.py | 5 +- .../unittests/test_conv2d_transpose_layer.py | 5 +- .../tests/unittests/test_conv3d_layer.py | 5 +- .../unittests/test_conv3d_transpose_layer.py | 5 +- .../tests/unittests/test_cuda_random_seed.py | 8 +- .../unittests/test_decoupled_py_reader.py | 2 +- .../fluid/tests/unittests/test_desc_clone.py | 4 +- .../fluid/tests/unittests/test_detach.py | 4 +- ..._dist_fleet_a_sync_optimizer_auto_async.py | 2 +- .../test_dist_fleet_heter_program.py | 18 +- .../unittests/test_dist_fleet_minimize.py | 12 +- .../tests/unittests/test_dist_fleet_ps.py | 12 +- .../tests/unittests/test_dist_fleet_ps11.py | 12 +- .../tests/unittests/test_dist_fleet_ps12.py | 12 +- .../tests/unittests/test_dist_fleet_ps13.py | 12 +- .../tests/unittests/test_dist_fleet_ps2.py | 12 +- .../tests/unittests/test_dist_fleet_ps3.py | 12 +- .../tests/unittests/test_dist_fleet_ps4.py | 12 +- .../tests/unittests/test_dist_fleet_ps5.py | 12 +- .../tests/unittests/test_dist_fleet_ps6.py | 12 +- .../test_dist_fleet_sparse_embedding_ctr.py | 6 +- .../tests/unittests/test_dist_fleet_spmt.py | 12 +- .../unittests/test_dist_sparse_load_ps0.py | 8 +- .../fluid/tests/unittests/test_dist_train.py | 12 +- .../tests/unittests/test_dist_transpiler.py | 20 +- .../test_eager_deletion_delete_vars.py | 2 +- .../test_eager_deletion_padding_rnn.py | 14 +- .../test_eager_deletion_recurrent_op.py | 4 +- .../tests/unittests/test_egr_python_api.py | 6 +- .../tests/unittests/test_functional_conv2d.py | 9 +- .../test_functional_conv2d_transpose.py | 9 +- .../tests/unittests/test_functional_conv3d.py | 9 +- .../test_functional_conv3d_transpose.py | 9 +- .../tests/unittests/test_fuse_bn_act_pass.py | 4 +- .../unittests/test_fuse_bn_add_act_pass.py | 14 +- .../test_fused_multi_transformer_op.py | 10 +- .../unittests/test_generator_dataloader.py | 2 +- .../fluid/tests/unittests/test_hsigmoid_op.py | 15 +- .../tests/unittests/test_imperative_deepcf.py | 2 +- ..._imperative_lod_tensor_to_selected_rows.py | 4 +- .../test_imperative_ocr_attention_model.py | 9 +- .../unittests/test_imperative_ptb_rnn.py | 14 +- .../tests/unittests/test_imperative_resnet.py | 2 +- .../unittests/test_imperative_save_load_v2.py | 14 +- ..._imperative_selected_rows_to_lod_tensor.py | 6 +- ..._imperative_transformer_sorted_gradient.py | 10 +- .../fluid/tests/unittests/test_initializer.py | 73 +- .../tests/unittests/test_ir_inplace_pass.py | 2 +- .../fluid/tests/unittests/test_layers.py | 66 +- .../fluid/tests/unittests/test_linear.py | 4 +- .../unittests/test_lookup_table_bf16_op.py | 2 +- .../unittests/test_lookup_table_v2_bf16_op.py | 2 +- .../unittests/test_lookup_table_v2_op.py | 4 +- ...cess_dataloader_iterable_dataset_static.py | 4 +- .../test_multiprocess_dataloader_static.py | 4 +- .../paddle/fluid/tests/unittests/test_nce.py | 5 +- .../test_nn_functional_embedding_static.py | 4 +- .../tests/unittests/test_optimizer_grad.py | 6 +- .../test_optimizer_in_control_flow.py | 8 +- .../unittests/test_parallel_executor_mnist.py | 4 +- .../fluid/tests/unittests/test_parameter.py | 3 +- .../fluid/tests/unittests/test_prelu_op.py | 4 +- .../unittests/test_program_prune_backward.py | 2 +- .../fluid/tests/unittests/test_prune.py | 6 +- .../fluid/tests/unittests/test_py_func_op.py | 2 +- .../fluid/tests/unittests/test_random_seed.py | 8 +- .../tests/unittests/test_recurrent_op.py | 8 +- .../fluid/tests/unittests/test_row_conv_op.py | 2 +- .../tests/unittests/test_run_program_op.py | 6 +- .../tests/unittests/test_set_bool_attr.py | 4 +- .../fluid/tests/unittests/test_sgd_op_bf16.py | 2 +- .../tests/unittests/test_static_save_load.py | 14 +- .../tests/unittests/test_tdm_child_op.py | 4 +- .../tests/unittests/test_tdm_sampler_op.py | 6 +- .../tests/unittests/test_uniform_random_op.py | 2 +- .../unittests/test_weight_normalization.py | 3 +- .../tests/unittests/transformer_model.py | 25 +- .../unittests/xpu/test_batch_norm_op_xpu.py | 2 +- .../test_fused_resnet_basic_block_op_xpu.py | 36 +- .../tests/unittests/xpu/test_prelu_op_xpu.py | 2 +- .../fluid/transpiler/distribute_transpiler.py | 4 +- python/paddle/incubate/asp/asp.py | 5 +- python/paddle/nn/decode.py | 2 +- python/paddle/nn/functional/common.py | 2 +- python/paddle/nn/functional/conv.py | 2 +- python/paddle/nn/functional/extension.py | 2 +- python/paddle/nn/functional/input.py | 2 +- python/paddle/nn/functional/loss.py | 2 +- python/paddle/nn/functional/vision.py | 2 +- python/paddle/nn/initializer/Bilinear.py | 182 +++ python/paddle/nn/initializer/__init__.py | 13 +- python/paddle/nn/initializer/assign.py | 120 +- python/paddle/nn/initializer/constant.py | 65 +- python/paddle/nn/initializer/dirac.py | 2 +- python/paddle/nn/initializer/initializer.py | 159 +++ python/paddle/nn/initializer/kaiming.py | 188 ++- python/paddle/nn/initializer/normal.py | 193 ++- python/paddle/nn/initializer/orthogonal.py | 2 +- python/paddle/nn/initializer/uniform.py | 133 +- python/paddle/nn/initializer/xavier.py | 182 ++- python/paddle/nn/layer/rnn.py | 9 +- python/paddle/optimizer/optimizer.py | 9 +- python/paddle/static/nn/common.py | 14 +- python/paddle/static/nn/loss.py | 4 +- python/paddle/static/nn/metric.py | 5 +- python/paddle/tensor/array.py | 2 +- python/paddle/tensor/attribute.py | 2 +- python/paddle/tensor/creation.py | 8 +- .../paddle/tensor/layer_function_generator.py | 2 +- python/paddle/tensor/linalg.py | 2 +- python/paddle/tensor/logic.py | 2 +- python/paddle/tensor/manipulation.py | 3 +- python/paddle/tensor/math.py | 2 +- python/paddle/tensor/random.py | 2 +- python/paddle/tensor/stat.py | 2 +- python/paddle/vision/ops.py | 4 +- 199 files changed, 1927 insertions(+), 1925 deletions(-) create mode 100644 python/paddle/nn/initializer/Bilinear.py create mode 100644 python/paddle/nn/initializer/initializer.py mode change 100755 => 100644 python/paddle/tensor/logic.py diff --git a/python/paddle/common_ops_import.py b/python/paddle/common_ops_import.py index 91a3f49cdb..1ec54064eb 100644 --- a/python/paddle/common_ops_import.py +++ b/python/paddle/common_ops_import.py @@ -32,7 +32,6 @@ from paddle.fluid.framework import ( # noqa: F401 dygraph_only, in_dygraph_mode, ) -from paddle.fluid.initializer import Constant # noqa: F401 from paddle.fluid.layer_helper import LayerHelper # noqa: F401 from paddle.fluid.layers import fill_constant, utils # noqa: F401 from paddle.fluid.layers.layer_function_generator import ( # noqa: F401 diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index efa94862b5..fbe391b45f 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -104,7 +104,6 @@ class DistributedJobInfo: self.job_info.strategy = dist_strategy -ReduceStrategyFluid = paddle.static.BuildStrategy.ReduceStrategy ReduceStrategyFleet = int @@ -261,7 +260,7 @@ class DistributedStrategy: for f in fields: value = getattr(self.strategy.build_strategy, f.name) if f.name == 'reduce_strategy': - value = ReduceStrategyFluid(value) + value = paddle.static.BuildStrategy.ReduceStrategy(value) setattr(build_strategy, f.name, value) return build_strategy diff --git a/python/paddle/distributed/fleet/layers/mpu/random.py b/python/paddle/distributed/fleet/layers/mpu/random.py index 7b89330d95..718c85e855 100644 --- a/python/paddle/distributed/fleet/layers/mpu/random.py +++ b/python/paddle/distributed/fleet/layers/mpu/random.py @@ -18,11 +18,11 @@ import numpy as np import paddle from paddle import _legacy_C_ops +from paddle.common_ops_import import Variable from paddle.fluid import core from paddle.fluid.data_feeder import check_variable_and_dtype from paddle.fluid.framework import in_dygraph_mode from paddle.framework import LayerHelper -from paddle.static import Variable __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index 9dce0d540a..98d131822f 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -171,7 +171,7 @@ class DGCMomentumOptimizer(Optimizer): if is_new_var: helper.set_variable_initializer( counter, - initializer=paddle.fluid.initializer.Constant( + initializer=paddle.nn.initializer.ConstantInitializer( value=float(begin - 1), force_cpu=True ), ) @@ -194,7 +194,7 @@ class DGCMomentumOptimizer(Optimizer): if is_new_var: helper.set_variable_initializer( counter, - initializer=paddle.fluid.initializer.Constant( + initializer=paddle.nn.initializer.ConstantInitializer( value=float(value), force_cpu=True ), ) diff --git a/python/paddle/distributed/fleet/metrics/metric.py b/python/paddle/distributed/fleet/metrics/metric.py index 999ab6f0af..d2f72b0c7d 100644 --- a/python/paddle/distributed/fleet/metrics/metric.py +++ b/python/paddle/distributed/fleet/metrics/metric.py @@ -18,7 +18,7 @@ import math import numpy as np import paddle -from paddle.static import Variable +from paddle.common_ops_import import Variable __all__ = [] diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index e8393c63b1..609bfa3d93 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -586,7 +586,6 @@ class IpuDynamicPatcher: """ from ..fluid.dygraph.base import switch_to_static_graph from ..fluid import backward - from ..fluid.initializer import Constant from ..fluid.framework import device_guard import paddle @@ -645,7 +644,10 @@ class IpuDynamicPatcher: device = optimizer._get_device_for_param(param_name) with device_guard(device): optimizer.helper.set_variable_initializer( - var, initializer=Constant(value=0.0) + var, + initializer=paddle.nn.initializer.Constant( + value=0.0 + ), ) param_or_lr_tensor = scope.find_var( var_tmp.name diff --git a/python/paddle/fluid/contrib/layers/metric_op.py b/python/paddle/fluid/contrib/layers/metric_op.py index 414fcf5b6c..07d6b464dd 100755 --- a/python/paddle/fluid/contrib/layers/metric_op.py +++ b/python/paddle/fluid/contrib/layers/metric_op.py @@ -17,7 +17,6 @@ Contrib layers just related to metric. import warnings from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.initializer import Normal, Constant from paddle.fluid.framework import Variable from paddle.fluid.param_attr import ParamAttr from paddle.fluid.layers import tensor @@ -147,7 +146,10 @@ def ctr_metric_bundle(input, label, ins_tag_weight=None): local_ins_num, ]: helper.set_variable_initializer( - var, Constant(value=0.0, force_cpu=True) + var, + paddle.nn.initializer.ConstantInitializer( + value=0.0, force_cpu=True + ), ) helper.append_op( diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index d2aff8bfcf..9064e4f9f0 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -24,7 +24,6 @@ import paddle from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers import utils from ... import unique_name -from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.data_feeder import ( check_variable_and_dtype, check_type, @@ -896,8 +895,10 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1) tree_info = [[0,0,0,1,2], [0,1,0,3,4],[0,1,0,5,6], @@ -908,7 +909,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): child_nums = 2 child, leaf_mask = fluid.contrib.layers.tdm_child(x, node_nums, child_nums, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( tree_info_np))) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -925,7 +926,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): attr=helper.param_attr, shape=[node_nums, 3 + child_nums], dtype=dtype, - default_initializer=Constant(0), + default_initializer=paddle.nn.initializer.Constant(0), ) tree_info.stop_gradient = True @@ -1003,8 +1004,10 @@ def tdm_sampler( Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1) travel_list = [[1, 3], [1, 4], [2, 5], [2, 6]] # leaf node's travel path, shape(leaf_node_num, layer_num) layer_list_flat = [[1], [2], [3], [4], [5], [6]] # shape(node_nums, 1) @@ -1022,10 +1025,10 @@ def tdm_sampler( layer_node_num_list, leaf_node_num, tree_travel_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( travel_array)), tree_layer_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( layer_array)), output_positive=True, output_list=True, @@ -1089,7 +1092,7 @@ def tdm_sampler( attr=tree_travel_attr, shape=travel_shape, dtype=tree_dtype, - default_initializer=Constant(0), + default_initializer=paddle.nn.initializer.Constant(0), ) layer_shape = [node_nums, 1] @@ -1097,7 +1100,7 @@ def tdm_sampler( attr=tree_layer_attr, shape=layer_shape, dtype=tree_dtype, - default_initializer=Constant(0), + default_initializer=paddle.nn.initializer.Constant(0), ) out = helper.create_variable_for_type_inference(dtype=dtype) @@ -1640,7 +1643,7 @@ def fused_bn_add_act( attr=helper.param_attr, shape=param_shape, dtype=bn_param_dtype, - default_initializer=Constant(1.0), + default_initializer=paddle.nn.initializer.Constant(1.0), ) bias = helper.create_parameter( attr=helper.bias_attr, @@ -1650,7 +1653,9 @@ def fused_bn_add_act( ) mean = helper.create_parameter( attr=ParamAttr( - name=moving_mean_name, initializer=Constant(0.0), trainable=False + name=moving_mean_name, + initializer=paddle.nn.initializer.Constant(0.0), + trainable=False, ), shape=param_shape, dtype=bn_param_dtype, @@ -1659,7 +1664,7 @@ def fused_bn_add_act( variance = helper.create_parameter( attr=ParamAttr( name=moving_variance_name, - initializer=Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=False, ), shape=param_shape, @@ -1723,13 +1728,16 @@ def pow2_decay_with_linear_warmup( helper = LayerHelper("pow2_decay_with_linear_warmup", **locals()) lr = helper.create_global_variable(persistable=True, dtype=dtype, shape=[1]) helper.set_variable_initializer( - lr, Constant(value=float(base_lr) / warmup_steps) + lr, + paddle.nn.initializer.Constant(value=float(base_lr) / warmup_steps), ) step = helper.create_global_variable( persistable=True, dtype='int64', shape=[1] ) - helper.set_variable_initializer(step, Constant(value=0)) + helper.set_variable_initializer( + step, paddle.nn.initializer.Constant(value=0) + ) assert ( warmup_steps <= total_steps ), "warmup_steps cannot be larger than total_steps" diff --git a/python/paddle/fluid/evaluator.py b/python/paddle/fluid/evaluator.py index 472bcbd3ca..a4d80ecbfe 100644 --- a/python/paddle/fluid/evaluator.py +++ b/python/paddle/fluid/evaluator.py @@ -20,7 +20,6 @@ from . import layers from .framework import Program, Variable, program_guard from . import unique_name from .layer_helper import LayerHelper -from .initializer import Constant def _clone_var_(block, var): diff --git a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py index 23f5a44fe1..9fc9182017 100644 --- a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py +++ b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py @@ -109,7 +109,7 @@ def model(): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -121,7 +121,7 @@ def model(): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -134,7 +134,7 @@ def model(): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 38650856b0..6eb88d8f8e 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -30,1139 +30,24 @@ from .data_feeder import check_variable_and_dtype, check_type, check_dtype from paddle import _C_ops, _legacy_C_ops import paddle -__all__ = [ - 'Constant', - 'Uniform', - 'Normal', - 'TruncatedNormal', - 'Xavier', - 'Bilinear', - 'MSRA', - 'ConstantInitializer', - 'UniformInitializer', - 'NormalInitializer', - 'TruncatedNormalInitializer', - 'XavierInitializer', - 'BilinearInitializer', - 'MSRAInitializer', - 'NumpyArrayInitializer', - 'set_global_initializer', -] +__all__ = ['set_global_initializer'] _global_weight_initializer_ = None _global_bias_initializer_ = None -class Initializer: - """Base class for variable initializers - - Defines the common interface of variable initializers. - They add operations to the init program that are used - to initialize variables. Users should not use this class - directly, but need to use one of its implementations. - """ - - def __init__(self): - pass - - def __call__(self, param, block=None): - if not lazy_init_helper().state: - return self.forward(param, block) - - return self._lazy_init(param, block) - - def forward(self, param, block=None): - """Add corresponding initialization operations to the network""" - raise NotImplementedError() - - def _lazy_init(self, param, block=None): - """ - Apply lazy initialization - """ - assert in_dygraph_mode() - - def init_op_creator(forward, param, block): - new_var = param._to_static_var(True, block=block) - # Record initializer operator - with lazy_init_helper(): - forward(new_var, block) - - # Add hook function for initializing param in dygraph mode - param.set_init_func(functools.partial(self.forward, param, block)) - param._init_op_creator = functools.partial( - init_op_creator, self.forward, param - ) - - return param - - def _check_block(self, block): - if block is None: - block = default_main_program().global_block() - - return block - - def _compute_fans(self, var): - """Compute the fan_in and the fan_out for layers - - This method computes the fan_in and the fan_out - for neural network layers, if not specified. It is - not possible to perfectly estimate fan_in and fan_out. - This method will estimate it correctly for matrix multiply and - convolutions. - - Args: - var: variable for which fan_in and fan_out have to be computed - - Returns: - tuple of two integers (fan_in, fan_out) - """ - shape = var.shape - if not shape or len(shape) == 0: - fan_in = fan_out = 1 - elif len(shape) == 1: - fan_in = fan_out = shape[0] - elif len(shape) == 2: - # This is the case for simple matrix multiply - fan_in = shape[0] - fan_out = shape[1] - else: - # Assume this to be a convolutional kernel - # In PaddlePaddle, the shape of the kernel is like: - # [num_filters, num_filter_channels, ...] where the remaining - # dimensions are the filter_size - receptive_field_size = np.prod(shape[2:]) - fan_in = shape[1] * receptive_field_size - fan_out = shape[0] * receptive_field_size - - return (fan_in, fan_out) - - -class ConstantInitializer(Initializer): - """Implements the constant initializer - - Args: - value (float32): constant value to initialize the variable - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name="data", shape=[8, 32, 32], dtype="float32") - fc = paddle.static.nn.fc( - x, - size=10, - weight_attr=fluid.initializer.Constant(value=2.0)) - - """ - - def __init__(self, value=0.0, force_cpu=False): - assert value is not None - super().__init__() - self._value = value - self._force_cpu = force_cpu - - def forward(self, var, block=None): - """Initialize the input tensor with constant. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) or isinstance( - var, framework.EagerParamBase - ) - assert isinstance(block, framework.Block) - - if in_dygraph_mode(): - place = _current_expected_place() - if self._force_cpu: - place = core.CPUPlace() - _C_ops.full_( - var, var.shape, str(float(self._value)), var.dtype, place - ) - return None - else: - op = block.append_op( - type="fill_constant", - outputs={"Out": var}, - attrs={ - "shape": var.shape, - "dtype": int(var.dtype), - "value": float(self._value), - 'str_value': str(float(self._value)), - 'force_cpu': self._force_cpu, - }, - stop_gradient=True, - ) - - var.op = op - return op - - -class UniformInitializer(Initializer): - """Implements the random uniform distribution initializer - - Args: - low (float): lower boundary of the uniform distribution - high (float): upper boundary of the uniform distribution - seed (int): random seed - diag_num (int): the number of diagonal elements to initialize. - If set to 0, diagonal initialization will be not performed. - diag_step (int): Step size between two diagonal elements, - which is generally the width of the square matrix. - diag_val (float): the value of the diagonal element to be initialized, - default 1.0. It takes effect only if the diag_num is greater than 0. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name='x', shape=[None, 1], dtype='float32') - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.Uniform(low=-0.5, high=0.5)) - """ - - def __init__( - self, low=-1.0, high=1.0, seed=0, diag_num=0, diag_step=0, diag_val=1.0 - ): - assert low is not None - assert high is not None - assert high >= low - assert seed is not None - assert diag_num is not None - assert diag_step is not None - assert diag_val is not None - if diag_num > 0 or diag_step > 0: - assert diag_num > 0 and diag_step > 0 - super().__init__() - self._low = low - self._high = high - self._seed = seed - self._diag_num = diag_num - self._diag_step = diag_step - self._diag_val = diag_val - - def forward(self, var, block=None): - """Initialize the input tensor with Uniform distribution. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(block, framework.Block) - if not in_dygraph_mode(): - check_variable_and_dtype( - var, - "Out", - ["uint16", "float16", "float32", "float64"], - "uniform_random", - ) - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initializers - if var.dtype == VarDesc.VarType.FP16: - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['uniform_random', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if in_dygraph_mode(): - out_var = _C_ops.uniform( - var.shape, - out_dtype, - self._low, - self._high, - self._seed, - _current_expected_place(), - ) - if var.dtype == VarDesc.VarType.FP16: - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": var.shape, - "dtype": out_dtype, - "min": self._low, - "max": self._high, - "seed": self._seed, - "diag_num": self._diag_num, - "diag_step": self._diag_step, - "diag_val": self._diag_val, - }, - stop_gradient=True, - ) - - if var.dtype == VarDesc.VarType.FP16: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op - - -class NormalInitializer(Initializer): - """Implements the Random Normal(Gaussian) distribution initializer - - Args: - loc (float): mean of the normal distribution - scale (float): standard deviation of the normal distribution - seed (int): random seed - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name="data", shape=[None, 32, 32], dtype="float32") - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.Normal(loc=0.0, scale=2.0)) - - """ - - def __init__(self, loc=0.0, scale=1.0, seed=0): - assert loc is not None - assert scale is not None - assert seed is not None - super().__init__() - self._mean = loc - self._std_dev = scale - self._seed = seed - - def forward(self, var, block=None): - """Initialize the input tensor with Normal distribution. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(block, framework.Block) - - if self._seed == 0: - self._seed = block.program.random_seed - - if in_dygraph_mode(): - place = _current_expected_place() - out_var = _C_ops.gaussian( - var.shape, - self._mean, - self._std_dev, - self._seed, - var.dtype, - place, - ) - out_var._share_underline_tensor_to(var) - return None - - else: - check_variable_and_dtype( - var, - "Out", - ["uint16", "float16", "float32", "float64"], - "guassian_random", - ) - op = block.append_op( - type="gaussian_random", - outputs={"Out": var}, - attrs={ - "shape": var.shape, - "dtype": var.dtype, - "mean": self._mean, - "std": self._std_dev, - "seed": self._seed, - "use_mkldnn": False, - }, - stop_gradient=True, - ) - var.op = op - return op - - -class TruncatedNormalInitializer(Initializer): - """Implements the Random TruncatedNormal(Gaussian) distribution initializer - - Args: - loc (float): mean of the normal distribution - scale (float): standard deviation of the normal distribution - seed (int): random seed - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name='x', shape=[None, 1], dtype='float32') - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0)) - """ - - def __init__(self, loc=0.0, scale=1.0, seed=0): - assert loc is not None - assert scale is not None - assert seed is not None - super().__init__() - self._mean = loc - self._std_dev = scale - self._seed = seed - - def forward(self, var, block=None): - """Initialize the input tensor with TruncatedNormal distribution. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) - assert isinstance(block, framework.Block) - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initalizers - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['truncated_gaussian_random', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if in_dygraph_mode(): - out_var = _C_ops.truncated_gaussian_random( - var.shape, - self._mean, - self._std_dev, - self._seed, - out_dtype, - _current_expected_place(), - ) - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - - else: - op = block.append_op( - type="truncated_gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": var.shape, - "dtype": out_dtype, - "mean": self._mean, - "std": self._std_dev, - "seed": self._seed, - }, - stop_gradient=True, - ) - - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - var.op = op - return op - - -class XavierInitializer(Initializer): - r""" - This class implements the Xavier weight initializer from the paper - `Understanding the difficulty of training deep feedforward neural - networks `_ - by Xavier Glorot and Yoshua Bengio. - - This initializer is designed to keep the scale of the gradients - approximately same in all the layers. In case of Uniform distribution, - the range is [-x, x], where - - .. math:: - - x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} - - In case of Normal distribution, the mean is 0 and the standard deviation - is - - .. math:: - - \sqrt{\\frac{2.0}{fan\_in + fan\_out}} - - - Args: - uniform (bool,default True): whether to use uniform ,if False use normal distribution - fan_in (float,default None): fan_in for Xavier initialization. If None, it is - inferred from the variable. - fan_out (float,default None): fan_out for Xavier initialization. If None, it is - inferred from the variable. - seed (int): random seed - - Note: - It is recommended to set fan_in and fan_out to None for most cases. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - queries = fluid.data(name='x', shape=[None,1], dtype='float32') - fc = paddle.static.nn.fc( - x=queries, size=10, - weight_attr=fluid.initializer.Xavier(uniform=False)) - - """ - - def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): - assert uniform is not None - assert seed is not None - super().__init__() - self._uniform = uniform - self._fan_in = fan_in - self._fan_out = fan_out - self._seed = seed - - def forward(self, var, block=None): - """Initialize the input tensor with Xavier initialization. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(block, framework.Block) - if not in_dygraph_mode(): - check_variable_and_dtype( - var, - "Out", - ["uint16", "float16", "float32", "float64"], - "xavier_init", - ) - - f_in, f_out = self._compute_fans(var) - - # If fan_in and fan_out are passed, use them - fan_in = f_in if self._fan_in is None else self._fan_in - fan_out = f_out if self._fan_out is None else self._fan_out - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['xavier_init', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if in_dygraph_mode(): - if self._uniform: - limit = math.sqrt(6.0 / float(fan_in + fan_out)) - out_var = _C_ops.uniform( - out_var.shape, - out_dtype, - -limit, - limit, - self._seed, - _current_expected_place(), - ) - else: - std = math.sqrt(2.0 / float(fan_in + fan_out)) - - place = _current_expected_place() - out_var = _C_ops.gaussian( - out_var.shape, 0.0, std, self._seed, out_dtype, place - ) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - if self._uniform: - limit = math.sqrt(6.0 / float(fan_in + fan_out)) - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": out_dtype, - "min": -limit, - "max": limit, - "seed": self._seed, - }, - stop_gradient=True, - ) - else: - std = math.sqrt(2.0 / float(fan_in + fan_out)) - op = block.append_op( - type="gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": out_var.dtype, - "mean": 0.0, - "std": std, - "seed": self._seed, - }, - stop_gradient=True, - ) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op - - -class MSRAInitializer(Initializer): - r"""Implements the MSRA initializer a.k.a. Kaiming Initializer - - This class implements the weight initialization from the paper - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification `_ - by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a - robust initialization method that particularly considers the rectifier - nonlinearities. In case of Uniform distribution, the range is [-x, x], where - - .. math:: - - x = gain \times \sqrt{\frac{3}{fan\_in}} - - In case of Normal distribution, the mean is 0 and the standard deviation - is - - .. math:: - - \frac{gain}{\sqrt{{fan\_in}}} - - Args: - uniform (bool, optional): whether to use uniform or normal distribution - fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. default is None. - seed (int32, optional): random seed. - negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. - nonlinearity(str, optional): the non-linear function. default is relu. - - Note: - It is recommended to set fan_in to None for most cases. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name="data", shape=[8, 32, 32], dtype="float32") - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.MSRA(uniform=False)) - +def _global_weight_initializer(): """ - - def __init__( - self, - uniform=True, - fan_in=None, - seed=0, - negative_slope=0, - nonlinearity='relu', - ): - """Constructor for MSRAInitializer""" - assert uniform is not None - assert seed is not None - super().__init__() - self._uniform = uniform - self._fan_in = fan_in - self._seed = seed - self._negative_slope = negative_slope - self._nonlinearity = nonlinearity - - def forward(self, var, block=None): - """Initialize the input tensor with MSRA initialization. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) - assert isinstance(block, framework.Block) - f_in, f_out = self._compute_fans(var) - - # If fan_in is passed, use it - fan_in = f_in if self._fan_in is None else self._fan_in - - if fan_in == 0: - if self._fan_in is None: - raise ValueError( - "The in_features of the Tensor contain zero, can not initialize the Tensor." - ) - else: - raise ValueError( - "fan_in should not be zero, can not initialize the Tensor." - ) - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['masra_init', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if in_dygraph_mode(): - if self._uniform: - gain = calculate_gain(self._nonlinearity, self._negative_slope) - limit = gain * math.sqrt(3.0 / float(fan_in)) - out_var = _C_ops.uniform( - var.shape, - out_dtype, - -limit, - limit, - self._seed, - _current_expected_place(), - ) - else: - gain = calculate_gain(self._nonlinearity, self._negative_slope) - std = gain / math.sqrt(float(fan_in)) - place = _current_expected_place() - out_var = _C_ops.gaussian( - out_var.shape, 0.0, std, self._seed, out_dtype, place - ) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - if self._uniform: - gain = calculate_gain(self._nonlinearity, self._negative_slope) - limit = gain * math.sqrt(3.0 / float(fan_in)) - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": int(out_dtype), - "min": -limit, - "max": limit, - "seed": self._seed, - }, - stop_gradient=True, - ) - - else: - gain = calculate_gain(self._nonlinearity, self._negative_slope) - std = gain / math.sqrt(float(fan_in)) - op = block.append_op( - type="gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": int(out_dtype), - "mean": 0.0, - "std": std, - "seed": self._seed, - }, - stop_gradient=True, - ) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op - - -class BilinearInitializer(Initializer): + Return the global weight initializer, The user doesn't need to use it. """ - This initializer can be used in transposed convolution operator to - act as upsampling. Users can upsample a feature map with shape of - (B, C, H, W) by any integer factor. The usage is: - - Examples: - - .. code-block:: python - - import math - - import paddle - import paddle.nn as nn - from paddle.regularizer import L2Decay - - factor = 2 - C = 2 - B = 8 - H = W = 32 - w_attr = paddle.ParamAttr(learning_rate=0., - regularizer=L2Decay(0.), - initializer=nn.initializer.Bilinear()) - data = paddle.rand([B, 3, H, W], dtype='float32') - conv_up = nn.Conv2DTranspose(3, - out_channels=C, - kernel_size=2 * factor - factor % 2, - padding=int( - math.ceil((factor - 1) / 2.)), - stride=factor, - weight_attr=w_attr, - bias_attr=False) - x = conv_up(data) + return _global_weight_initializer_ - Where, `out_channels=C` and `groups=C` means this is channel-wise transposed - convolution. The filter shape will be (C, 1, K, K) where K is `kernel_size`, - This initializer will set a (K, K) interpolation kernel for every channel - of the filter identically. The resulting shape of the output feature map - will be (B, C, factor * H, factor * W). Note that the learning rate and the - weight decay are set to 0 in order to keep coefficient values of bilinear - interpolation unchanged during training. +def _global_bias_initializer(): """ - - def __init__(self): - """Constructor for BilinearInitializer.""" - super().__init__() - - def forward(self, var, block=None): - """Initialize the input tensor with Bilinear initialization. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - if not isinstance(var, framework.Variable): - raise ValueError("var must be framework.Variable.") - - if not isinstance(block, framework.Block): - raise ValueError("block must be framework.Block.") - - shape = var.shape - if len(shape) != 4: - raise ValueError("the length of shape must be 4.") - if shape[2] != shape[3]: - raise ValueError("shape[2] must be equal to shape[3].") - - weight = np.zeros(np.prod(var.shape), dtype='float32') - size = shape[3] - # factor - f = np.ceil(size / 2.0) - # center - c = (2 * f - 1 - f % 2) / (2.0 * f) - for i in range(np.prod(shape)): - x = i % size - y = (i / size) % size - weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) - weight = np.reshape(weight, shape) - - # to be compatible of fp16 initalizers - if var.dtype in [ - VarDesc.VarType.FP16, - VarDesc.VarType.BF16, - VarDesc.VarType.FP64, - ]: - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['bilinear_init', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if out_dtype == VarDesc.VarType.FP32: - value_name = "fp32_values" - values = [float(v) for v in weight.flat] - else: - raise TypeError("Unsupported dtype %s", var.dtype) - - if np.prod(shape) > 1024 * 1024: - raise ValueError("The size of input is too big. ") - - if in_dygraph_mode(): - _C_ops.assign_value_( - out_var, - list(shape), - out_dtype, - values, - _current_expected_place(), - ) - if var.dtype in [ - VarDesc.VarType.FP16, - VarDesc.VarType.BF16, - VarDesc.VarType.FP64, - ]: - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - op = block.append_op( - type='assign_value', - outputs={'Out': [out_var]}, - attrs={ - 'dtype': out_dtype, - 'shape': list(shape), - value_name: values, - }, - ) - - if var.dtype in [ - VarDesc.VarType.FP16, - VarDesc.VarType.BF16, - VarDesc.VarType.FP64, - ]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op - - -class NumpyArrayInitializer(Initializer): - """Init an parameter with an numpy array - This op initialize the variable by numpy array. - - Args: - value (numpy): numpy array to initialize the variable - - Returns: - A Tensor variable initialized by numpy. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - import numpy - paddle.enable_static() - x = fluid.data(name="x", shape=[2, 1], dtype='float32') - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.NumpyArrayInitializer(numpy.array([1,2]))) + Return the global weight initializer, The user doesn't need to use it. """ - - def __init__(self, value): - import numpy - - assert isinstance(value, numpy.ndarray) - super().__init__() - self._value = value - - def forward(self, var, block=None): - """Initialize the input tensor with Numpy array. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) - assert isinstance(block, framework.Block) - - # to be compatible of fp16 initalizers - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - out_dtype = VarDesc.VarType.FP32 - np_value = self._value.astype("float32") - out_var = block.create_var( - name=unique_name.generate( - ".".join(['numpy_array_init', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_var = var - out_dtype = var.dtype - np_value = self._value - - if out_dtype == VarDesc.VarType.FP32: - value_name = "fp32_values" - values = [float(v) for v in np_value.flat] - elif out_dtype == VarDesc.VarType.INT32: - value_name = "int32_values" - values = [int(v) for v in np_value.flat] - else: - raise ValueError("Unsupported dtype %s", self._value.dtype) - if self._value.size > 1024 * 1024 * 1024: - raise ValueError( - "The size of input is too big. Please consider " - "saving it to file and 'load_op' to load it" - ) - - if in_dygraph_mode(): - _C_ops.assign_value_( - out_var, - list(self._value.shape), - out_dtype, - values, - _current_expected_place(), - ) - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - op = block.append_op( - type='assign_value', - outputs={'Out': out_var}, - attrs={ - 'dtype': out_dtype, - 'shape': list(self._value.shape), - value_name: values, - }, - stop_gradient=True, - ) - - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op + return _global_bias_initializer_ def set_global_initializer(weight_init, bias_init=None): @@ -1218,7 +103,7 @@ def set_global_initializer(weight_init, bias_init=None): check_type( weight_init, 'weight_init', - (Initializer, type(None)), + (paddle.nn.initializer.Initializer, type(None)), 'set_global_initializer', ) global _global_weight_initializer_ @@ -1227,93 +112,8 @@ def set_global_initializer(weight_init, bias_init=None): check_type( bias_init, 'bias_init', - (Initializer, type(None)), + (paddle.nn.initializer.Initializer, type(None)), 'set_global_initializer', ) global _global_bias_initializer_ _global_bias_initializer_ = bias_init - - -def _global_weight_initializer(): - """ - Return the global weight initializer, The user doesn't need to use it. - """ - return _global_weight_initializer_ - - -def _global_bias_initializer(): - """ - Return the global weight initializer, The user doesn't need to use it. - """ - return _global_bias_initializer_ - - -def calculate_gain(nonlinearity, param=None): - """ - Get the recommended ``gain`` value of some nonlinearity function. ``gain`` value can be used in some - ``paddle.nn.initializer`` api to adjust the initialization value. - - Args: - nonlinearity(str): name of nonlinearity activation function. If it is a linear function, such as: - `linear/conv1d/conv2d/conv3d/conv1d_transpose/conv2d_transpose/conv3d_transpose` , 1.0 will be returned. - param(bool|int|float, optional): optional parameter for somme nonlinearity function. Now, it only applies to - 'leaky_relu'. Default: None, it will be calculated as 0.01 in the formula. - - Returns: - A float value, which is the recommended gain for this nonlinearity function. - - Examples: - .. code-block:: python - - import paddle - gain = paddle.nn.initializer.calculate_gain('tanh') # 5.0 / 3 - gain = paddle.nn.initializer.calculate_gain('leaky_relu', param=1.0) # 1.0 = math.sqrt(2.0 / (1+param^2)) - initializer = paddle.nn.initializer.Orthogonal(gain) - - """ - if param is None: - param = 0.01 - else: - assert isinstance(param, (bool, int, float)) - param = float(param) - recommended_gain = { - 'sigmoid': 1, - 'linear': 1, - 'conv1d': 1, - 'conv2d': 1, - 'conv3d': 1, - 'conv1d_transpose': 1, - 'conv2d_transpose': 1, - 'conv3d_transpose': 1, - 'tanh': 5.0 / 3, - 'relu': math.sqrt(2.0), - 'leaky_relu': math.sqrt(2.0 / (1 + param**2)), - 'selu': 3.0 / 4, - } - if nonlinearity in recommended_gain.keys(): - return recommended_gain[nonlinearity] - else: - raise ValueError( - "nonlinearity function {} is not suppported now.".format( - nonlinearity - ) - ) - - -# We short the class name, since users will use the initializer with the package -# name. The sample code: -# -# import paddle -# import paddle.fluid as fluid -# -# hidden = paddle.static.nn.fc(..., -# weight_attr=ParamAttr(fluid.initializer.Xavier())) -# -# It is no need to add an `Initializer` as the class suffix -Constant = ConstantInitializer -Uniform = UniformInitializer -Normal = NormalInitializer -TruncatedNormal = TruncatedNormalInitializer -Xavier = XavierInitializer -MSRA = MSRAInitializer -Bilinear = BilinearInitializer diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index 241dd71e20..ce93a25cce 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -22,7 +22,6 @@ from .framework import ( cpu_places, ) from .param_attr import ParamAttr -from .initializer import Constant from . import layers from . import backward from .dygraph import Layer @@ -42,7 +41,9 @@ class SimpleLayer(Layer): self._linear1 = paddle.nn.Linear( input_size, 3, - weight_attr=ParamAttr(initializer=Constant(value=0.1)), + weight_attr=ParamAttr( + initializer=paddle.nn.initializer.Constant(value=0.1) + ), ) def forward(self, inputs): diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 9c3de1ba49..0342017822 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -13,7 +13,7 @@ # limitations under the License. import copy - +import paddle from .framework import ( Parameter, dtype_is_floating, @@ -22,7 +22,6 @@ from .framework import ( _global_flags, ) from . import unique_name -from paddle.fluid.initializer import Constant, Xavier from .param_attr import ParamAttr from . import core @@ -178,10 +177,10 @@ class LayerHelper(LayerHelperBase): # TODO (jiabin): should we remove this since it has never be used def _get_default_initializer(self, dtype): if dtype is None or dtype_is_floating(dtype) is True: - return Xavier() + return paddle.nn.initializer.XavierUniform() else: # For integer and boolean types, initialize with all zeros - return Constant() + return paddle.nn.initializer.Constant() # TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs def is_instance(self, param_name, cls): diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 994fc98038..eb4d227f91 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -92,7 +92,7 @@ class ListenAndServ: shape=[32, 32], dtype='float32', name="X") - fluid.initializer.Constant(value=1.0)(x, main.global_block()) + paddle.nn.initializer.Constant(value=1.0)(x, main.global_block()) paddle.scale(x=x, scale=10.0, out=out_var) exe = fluid.Executor(place) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index fa0f49d01b..1dd819df41 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -22,7 +22,6 @@ import numpy as np import paddle from ..layer_helper import LayerHelper -from ..initializer import Normal, Constant from ..framework import ( Variable, OpProtoHolder, @@ -240,7 +239,7 @@ def embedding( w_param_attrs = fluid.ParamAttr( name="emb_weight", learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer(weight_data), + initializer=paddle.nn.initializer.Assign(weight_data), trainable=True) emb_2 = fluid.layers.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32') """ @@ -673,7 +672,10 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ) if is_new_var: helper.set_variable_initializer( - counter, initializer=Constant(value=begin - 1, force_cpu=True) + counter, + initializer=paddle.nn.initializer.ConstantInitializer( + value=begin - 1, force_cpu=True + ), ) helper.main_program.global_block()._prepend_op( type='increment', diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index 5d702b8e52..b04611db66 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -19,7 +19,6 @@ import numpy as np import copy from .layer_helper import LayerHelper -from .initializer import Constant from . import unique_name from .framework import Program, Variable, program_guard from . import layers diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index c5aa80c749..d7ab914f80 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -39,7 +39,6 @@ from .backward import ( _get_no_grad_set_name, ) from .framework import program_guard -from .initializer import Constant from .layer_helper import LayerHelper from .dygraph import base as imperative_base from .dygraph import no_grad @@ -397,7 +396,8 @@ class Optimizer: lr_value = float(self._learning_rate()) self.helper.set_variable_initializer( - lr_var, initializer=Constant(value=lr_value) + lr_var, + initializer=paddle.nn.initializer.Constant(value=lr_value), ) return @@ -713,7 +713,10 @@ class Optimizer: device = self._get_device_for_param(param.name) with device_guard(device): self.helper.set_variable_initializer( - var, initializer=Constant(value=float(fill_value)) + var, + initializer=paddle.nn.initializer.Constant( + value=float(fill_value) + ), ) if in_dygraph_mode(): @@ -774,7 +777,10 @@ class Optimizer: device = 'cpu' with device_guard(device): self.helper.set_variable_initializer( - var, initializer=Constant(value=float(fill_value)) + var, + initializer=paddle.nn.initializer.Constant( + value=float(fill_value) + ), ) if in_dygraph_mode(): @@ -1225,10 +1231,12 @@ class Optimizer: # NOTE(zhiqiu): the initializer should be set after coalesce_tensor op, # so the shape of flatten_param and flatten_grad will be inferred. self.helper.set_variable_initializer( - flatten_param, initializer=Constant(0.0) + flatten_param, + initializer=paddle.nn.initializer.Constant(0.0), ) self.helper.set_variable_initializer( - flatten_grad, initializer=Constant(0.0) + flatten_grad, + initializer=paddle.nn.initializer.Constant(0.0), ) return [(flatten_param, flatten_grad)] diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index f251a654a9..6fdadd7904 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .initializer import Initializer, Xavier, Constant +import paddle from .regularizer import WeightDecayRegularizer from paddle.fluid.data_feeder import check_type @@ -88,7 +88,10 @@ class ParamAttr: check_type(do_model_average, "do_model_average", (bool), "ParamAttr") check_type(need_clip, "need_clip", (bool), "ParamAttr") check_type( - initializer, "initializer", (Initializer, type(None)), "ParamAttr" + initializer, + "initializer", + (paddle.nn.initializer.Initializer, type(None)), + "ParamAttr", ) check_type( regularizer, @@ -139,7 +142,7 @@ class ParamAttr: Returns: None. """ - self._set_default_initializer(Xavier()) + self._set_default_initializer(paddle.nn.initializer.XavierUniform()) def _set_default_bias_initializer(self): """ @@ -151,7 +154,7 @@ class ParamAttr: Returns: None. """ - self._set_default_initializer(Constant(0.0)) + self._set_default_initializer(paddle.nn.initializer.Constant(0.0)) @staticmethod def _to_attr(arg): @@ -177,7 +180,7 @@ class ParamAttr: return arg elif isinstance(arg, str): return ParamAttr(name=arg) - elif isinstance(arg, Initializer): + elif isinstance(arg, paddle.nn.initializer.Initializer): return ParamAttr(initializer=arg) elif isinstance(arg, WeightDecayRegularizer): return ParamAttr(regularizer=arg) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py index 593d79998a..80ebe78963 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py @@ -36,7 +36,6 @@ from paddle.distributed.auto_parallel.utils import ( save_distributed_checkpoint, ) from paddle.distributed.fleet import auto -from paddle.fluid.initializer import NumpyArrayInitializer paddle.enable_static() _global_parallel_strategy = None @@ -55,8 +54,12 @@ class MLPLayer(nn.Layer): np.random.seed(2021) arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr1 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) - weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) - weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) + weight_attr0 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr0) + ) + weight_attr1 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr1) + ) bias_attr = None self.linear0 = nn.Linear( d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py index 875536af57..1cb2a3e9bf 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py @@ -30,7 +30,6 @@ from paddle.distributed.auto_parallel.utils import ( save_distributed_checkpoint, ) from paddle.distributed.fleet import auto -from paddle.fluid.initializer import NumpyArrayInitializer paddle.enable_static() _global_parallel_strategy = None @@ -48,7 +47,9 @@ class MLPLayer(nn.Layer): dim_feedforward = intermediate_size np.random.seed(2021) arr = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) - weight_attr = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr)) + weight_attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr) + ) bias_attr = None self.linear0 = nn.Linear( diff --git a/python/paddle/fluid/tests/unittests/collective/column_parallel_linear_api.py b/python/paddle/fluid/tests/unittests/collective/column_parallel_linear_api.py index 59eee4cfee..c1ed3175e1 100644 --- a/python/paddle/fluid/tests/unittests/collective/column_parallel_linear_api.py +++ b/python/paddle/fluid/tests/unittests/collective/column_parallel_linear_api.py @@ -38,15 +38,11 @@ class TestColumnParallelLinearAPI(TestCollectiveAPIRunnerBase): paddle.distributed.broadcast(data, src=0) if rank == 0: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( - np_array[:, 0:8] - ), + initializer=paddle.nn.initializer.Assign(np_array[:, 0:8]), ) else: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( - np_array[:, 8:16] - ), + initializer=paddle.nn.initializer.Assign(np_array[:, 8:16]), ) linear_out = paddle.distributed.split( diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index a4d20264e7..ca4ad63066 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -242,10 +242,10 @@ class PrePostProcessLayer(Layer): self._layer_norm = paddle.nn.LayerNorm( normalized_shape=d_model, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), ) @@ -513,7 +513,9 @@ class PrepareEncoderDecoderLayer(Layer): sparse=is_sparse, weight_attr=fluid.ParamAttr( name=word_emb_param_name, - initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), + initializer=paddle.nn.initializer.Normal( + 0.0, src_emb_dim**-0.5 + ), ), ) @@ -527,7 +529,7 @@ class PrepareEncoderDecoderLayer(Layer): sparse=is_sparse, weight_attr=fluid.ParamAttr( name=pos_enc_param_name, - initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), + initializer=paddle.nn.initializer.Assign(pos_inp), trainable=False, ), ) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py index 2eb0951756..1fff26b20b 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py @@ -39,7 +39,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -50,7 +50,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -65,7 +65,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) # To cover @RENAMED@GRADIENT @@ -74,7 +74,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) predict += predict2 diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py index e094d932d3..a1d8688fd4 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py @@ -39,7 +39,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -50,7 +50,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -65,7 +65,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) # To cover @RENAMED@GRADIENT @@ -74,7 +74,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) predict += predict2 diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py index 7e442f1914..74c3c1a726 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py @@ -39,7 +39,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -50,7 +50,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -64,7 +64,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py index 7f247abc6d..035a174775 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py @@ -33,11 +33,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py index b63e2065f4..a480993e8e 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py @@ -33,11 +33,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr @@ -65,7 +63,7 @@ def create_model(data, rank): data, size=OUT_SIZE, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(np_weight) + initializer=paddle.nn.initializer.Assign(np_weight) ), bias_attr=bias_attr, ) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py index 914ee0852a..689b068f02 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py @@ -44,9 +44,7 @@ def create_model(data, rank): axis=0, num_partitions=MODEL_PARALLEL_SIZE, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - np_weight_part - ) + initializer=paddle.nn.initializer.Assign(np_weight_part) ), bias_attr=False, ) @@ -55,7 +53,7 @@ def create_model(data, rank): data, size=OUT_SIZE, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(np_weight) + initializer=paddle.nn.initializer.Assign(np_weight) ), bias_attr=False, ) diff --git a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_dpppmp.py b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_dpppmp.py index de839e2c5e..ec864a1e40 100644 --- a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_dpppmp.py +++ b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_dpppmp.py @@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024): else: step = shape[0] // mp.nranks _w = w[mp.rank * step : mp.rank * step + step, :] - return paddle.fluid.initializer.NumpyArrayInitializer(_w) + return paddle.nn.initializer.Assign(_w) class Criterion(nn.Layer): diff --git a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_fp16.py b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_fp16.py index 160ed85cc9..3f95272091 100644 --- a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_fp16.py +++ b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_fp16.py @@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024): else: step = shape[0] // mp.nranks _w = w[mp.rank * step : mp.rank * step + step, :] - return paddle.fluid.initializer.NumpyArrayInitializer(_w) + return paddle.nn.initializer.Assign(_w) class Criterion(nn.Layer): diff --git a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_recompute.py b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_recompute.py index 31daee3262..af2b1b616d 100644 --- a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_recompute.py +++ b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_recompute.py @@ -36,7 +36,7 @@ def weight_init(mp, shape, col=True, seed=1024): else: step = shape[0] // mp.nranks _w = w[mp.rank * step : mp.rank * step + step, :] - return paddle.fluid.initializer.NumpyArrayInitializer(_w) + return paddle.nn.initializer.Assign(_w) class Criterion(nn.Layer): diff --git a/python/paddle/fluid/tests/unittests/collective/parallel_embedding_api.py b/python/paddle/fluid/tests/unittests/collective/parallel_embedding_api.py index 9dd3bade93..f89643e7bf 100644 --- a/python/paddle/fluid/tests/unittests/collective/parallel_embedding_api.py +++ b/python/paddle/fluid/tests/unittests/collective/parallel_embedding_api.py @@ -42,13 +42,13 @@ class TestParallelEmbeddingAPI(TestCollectiveAPIRunnerBase): per_part_size = size[0] // 2 if rank == 0: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( np_array[0:per_part_size, :] ), ) else: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( np_array[per_part_size : size[0], :] ), ) diff --git a/python/paddle/fluid/tests/unittests/collective/row_parallel_linear_api.py b/python/paddle/fluid/tests/unittests/collective/row_parallel_linear_api.py index afbb3f3334..6c3817da5a 100644 --- a/python/paddle/fluid/tests/unittests/collective/row_parallel_linear_api.py +++ b/python/paddle/fluid/tests/unittests/collective/row_parallel_linear_api.py @@ -39,13 +39,13 @@ class TestRowParallelLinearAPI(TestCollectiveAPIRunnerBase): data = paddle.split(data, 2, axis=1)[rank] if rank == 0: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( np_array[0:500, :] ), ) else: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( np_array[500:1000, :] ), ) diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py index 30bcea4cb5..044c6d78ca 100644 --- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py @@ -38,7 +38,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -49,7 +49,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -63,7 +63,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/dist_ctr.py b/python/paddle/fluid/tests/unittests/dist_ctr.py index deb4cb921c..dc9bd59df5 100644 --- a/python/paddle/fluid/tests/unittests/dist_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_ctr.py @@ -60,7 +60,7 @@ class TestDistCTR2x2(TestDistRunnerBase): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=IS_SPARSE, ) @@ -74,7 +74,7 @@ class TestDistCTR2x2(TestDistRunnerBase): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -87,7 +87,7 @@ class TestDistCTR2x2(TestDistRunnerBase): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=IS_SPARSE, ) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 8e9341f9c5..527ba34bae 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -107,7 +107,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, padding_idx=0, @@ -122,7 +122,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -135,7 +135,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, padding_idx=0, diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py index 3e71a1cb60..de0f32e311 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py @@ -78,7 +78,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -94,7 +94,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -109,7 +109,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py index dc0a7022b3..453b715b50 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py @@ -40,7 +40,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -51,7 +51,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -65,7 +65,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py index ac1a4c632f..25f8663c74 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py @@ -40,7 +40,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -51,7 +51,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -65,7 +65,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index b673bfeae1..bd4fc90fd2 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -124,7 +124,8 @@ def train_network( is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), name="__emb__" + initializer=paddle.nn.initializer.Constant(value=0.01), + name="__emb__", ), is_sparse=is_sparse, ) @@ -137,7 +138,7 @@ def train_network( x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -149,7 +150,7 @@ def train_network( is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -164,7 +165,8 @@ def train_network( x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), name="__fc__" + initializer=paddle.nn.initializer.Constant(value=0.01), + name="__fc__", ), bias_attr=fluid.ParamAttr(name="__fc_b__"), ) @@ -175,7 +177,8 @@ def train_network( is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), name="__emb__" + initializer=paddle.nn.initializer.Constant(value=0.01), + name="__emb__", ), is_sparse=is_sparse, ) @@ -188,7 +191,8 @@ def train_network( x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), name="__fc__" + initializer=paddle.nn.initializer.Constant(value=0.01), + name="__fc__", ), bias_attr=fluid.ParamAttr(name="__fc_b__"), ) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py index a9a2d7be0b..1780e7dfe2 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py @@ -86,11 +86,11 @@ class TestDistCTR2x2(FleetDistRunnerBase): inference = bool(int(os.getenv("INFERENCE", "0"))) if initializer == 0: - init = fluid.initializer.Constant(value=0.01) + init = paddle.nn.initializer.Constant(value=0.01) elif initializer == 1: - init = fluid.initializer.Uniform() + init = paddle.nn.initializer.Uniform() elif initializer == 2: - init = fluid.initializer.Normal() + init = paddle.nn.initializer.Normal() else: raise ValueError("error initializer code: {}".format(initializer)) @@ -113,7 +113,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -127,7 +127,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): entry=entry, param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), ) diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py index 87eb22dcea..30c1130e33 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist.py @@ -39,7 +39,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -50,7 +50,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -64,7 +64,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py index db3318d67d..6482ac53b0 100644 --- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py @@ -121,7 +121,7 @@ class SE_ResNeXt: size=class_dim, activation='softmax', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + initializer=paddle.nn.initializer.Constant(value=0.05) ), ) return out @@ -174,7 +174,7 @@ class SE_ResNeXt: act=None, # avoid pserver CPU init differs from GPU param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + initializer=paddle.nn.initializer.Constant(value=0.05) ), bias_attr=False, ) @@ -187,7 +187,7 @@ class SE_ResNeXt: x=pool, size=num_channels // reduction_ratio, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + initializer=paddle.nn.initializer.Constant(value=0.05) ), activation='relu', ) @@ -196,7 +196,7 @@ class SE_ResNeXt: x=squeeze, size=num_channels, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + initializer=paddle.nn.initializer.Constant(value=0.05) ), activation='sigmoid', ) diff --git a/python/paddle/fluid/tests/unittests/dist_text_classification.py b/python/paddle/fluid/tests/unittests/dist_text_classification.py index a287bd8a6c..d29997ef8a 100644 --- a/python/paddle/fluid/tests/unittests/dist_text_classification.py +++ b/python/paddle/fluid/tests/unittests/dist_text_classification.py @@ -59,7 +59,7 @@ def conv_net( size=[dict_dim, emb_dim], is_sparse=False, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -70,7 +70,7 @@ def conv_net( act="tanh", pool_type="max", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -78,7 +78,7 @@ def conv_net( x=[conv_3], size=fc0_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -87,7 +87,7 @@ def conv_net( size=class_dim, activation="softmax", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index 5b0343bd81..e9ce91c197 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -29,7 +29,9 @@ import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle.nn.functional as F -const_para_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant(0.001)) +const_para_attr = fluid.ParamAttr( + initializer=paddle.nn.initializer.Constant(0.001) +) const_bias_attr = const_para_attr # Fix seed for test @@ -1253,8 +1255,8 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.0): out = layers.layer_norm( out, begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.initializer.Constant(1.0), - bias_attr=fluid.initializer.Constant(0.0), + param_attr=paddle.nn.initializer.Constant(1.0), + bias_attr=paddle.nn.initializer.Constant(0.0), ) elif cmd == "d": # add dropout if dropout_rate: @@ -1292,7 +1294,7 @@ def prepare_encoder( size=[src_vocab_size, src_emb_dim], param_attr=fluid.ParamAttr( name=word_emb_param_name, - initializer=fluid.initializer.ConstantInitializer(0.001), + initializer=paddle.nn.initializer.Constant(0.001), ), ) else: @@ -1301,7 +1303,9 @@ def prepare_encoder( size=[src_vocab_size, src_emb_dim], param_attr=fluid.ParamAttr( name=word_emb_param_name, - initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), + initializer=paddle.nn.initializer.Normal( + 0.0, src_emb_dim**-0.5 + ), ), ) @@ -1312,7 +1316,7 @@ def prepare_encoder( param_attr=fluid.ParamAttr( name=pos_enc_param_name, trainable=False, - initializer=fluid.initializer.ConstantInitializer(0.001), + initializer=paddle.nn.initializer.Constant(0.001), ), ) src_pos_enc.stop_gradient = True diff --git a/python/paddle/fluid/tests/unittests/dist_word2vec.py b/python/paddle/fluid/tests/unittests/dist_word2vec.py index e10131667c..f5de20385f 100644 --- a/python/paddle/fluid/tests/unittests/dist_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dist_word2vec.py @@ -41,7 +41,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name='shared_w', - initializer=fluid.initializer.Constant(value=0.1), + initializer=paddle.nn.initializer.Constant(value=0.1), ), ) embed_second = fluid.layers.embedding( @@ -51,7 +51,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name='shared_w', - initializer=fluid.initializer.Constant(value=0.1), + initializer=paddle.nn.initializer.Constant(value=0.1), ), ) embed_third = fluid.layers.embedding( @@ -61,7 +61,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name='shared_w', - initializer=fluid.initializer.Constant(value=0.1), + initializer=paddle.nn.initializer.Constant(value=0.1), ), ) embed_forth = fluid.layers.embedding( @@ -71,7 +71,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name='shared_w', - initializer=fluid.initializer.Constant(value=0.1), + initializer=paddle.nn.initializer.Constant(value=0.1), ), ) @@ -84,7 +84,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): size=HIDDEN_SIZE, activation='sigmoid', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1) + initializer=paddle.nn.initializer.Constant(value=0.1) ), ) predict_word = paddle.static.nn.fc( @@ -92,7 +92,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): size=dict_size, activation='softmax', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1) + initializer=paddle.nn.initializer.Constant(value=0.1) ), ) cost = paddle.nn.functional.cross_entropy( diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py index a99b56974a..9b9d45db08 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py @@ -26,7 +26,6 @@ import paddle.nn.functional as F import paddle.static as static import paddle.utils as utils from paddle.distributed.fleet import auto -from paddle.fluid.initializer import NumpyArrayInitializer logging.getLogger().setLevel(logging.INFO) paddle.enable_static() @@ -42,8 +41,12 @@ class MLPLayer(nn.Layer): np.random.seed(2021) arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) - weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) - weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) + weight_attr0 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr0) + ) + weight_attr1 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr1) + ) bias_attr = None self.linear0 = nn.Linear( d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py index 1ea69dfbb1..8629a3e185 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py @@ -203,8 +203,8 @@ class BertModelLayer(Layer): self._sent_emb_name = "sent_embedding" self._dtype = "float16" if use_fp16 else "float32" - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range'] + self._param_initializer = paddle.nn.initializer.TruncatedNormal( + std=config['initializer_range'] ) paddle.set_default_dtype(self._dtype) self._src_emb = paddle.nn.Embedding( @@ -317,8 +317,8 @@ class PretrainModelLayer(Layer): self._prepostprocess_dropout = config['hidden_dropout_prob'] self._word_emb_name = "word_embedding" - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range'] + self._param_initializer = paddle.nn.initializer.TruncatedNormal( + std=config['initializer_range'] ) self._weight_sharing = weight_sharing self.use_fp16 = use_fp16 @@ -343,7 +343,7 @@ class PretrainModelLayer(Layer): self.mask_lm_out_bias_attr = fluid.ParamAttr( name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) if not self._weight_sharing: diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py index 783dfff262..1e7950c29e 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py @@ -41,7 +41,7 @@ class ConvBNLayer(fluid.dygraph.Layer): padding=padding, groups=groups, weight_attr=ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02) + initializer=paddle.nn.initializer.Normal(0.0, 0.02) ), bias_attr=False, ) @@ -49,11 +49,11 @@ class ConvBNLayer(fluid.dygraph.Layer): num_channels=ch_out, is_test=is_test, param_attr=ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02), + initializer=paddle.nn.initializer.Normal(0.0, 0.02), regularizer=L2Decay(0.0), ), bias_attr=ParamAttr( - initializer=fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), regularizer=L2Decay(0.0), ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index 99d90a7f1e..88581c023f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -26,10 +26,8 @@ from paddle.nn import Embedding INF = 1.0 * 1e5 alpha = 0.6 -uniform_initializer = lambda x: fluid.initializer.UniformInitializer( - low=-x, high=x -) -zero_constant = fluid.initializer.Constant(0.0) +uniform_initializer = lambda x: paddle.nn.initializer.Uniform(low=-x, high=x) +zero_constant = paddle.nn.initializer.Constant(0.0) class BasicLSTMUnit(Layer): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py index c76b4dba9c..7f93c83b91 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py @@ -17,9 +17,9 @@ from functools import reduce import paddle import paddle.fluid as fluid import paddle.fluid.param_attr as attr +from paddle.common_ops_import import Variable from paddle.fluid.dygraph import Layer from paddle.jit.api import to_static -from paddle.static import Variable class EmbeddingLayer: @@ -48,7 +48,8 @@ class EmbeddingLayer: sparse=True, padding_idx=self.padding_idx, weight_attr=attr.ParamAttr( - name=self.name, initializer=fluid.initializer.Xavier() + name=self.name, + initializer=paddle.nn.initializer.XavierUniform(), ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py index 99fe330c69..d8c5956357 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py @@ -15,7 +15,7 @@ from functools import reduce import paddle -from paddle.static import Variable +from paddle.common_ops_import import Variable class EmbeddingLayer: diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py index 1f589b8d6f..e1aaeabd48 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py @@ -118,10 +118,10 @@ def dyfunc_BilinearTensorProduct(layer1, layer2): 4, 1000, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), ) @@ -138,10 +138,10 @@ def dyfunc_Conv2D(input): out_channels=2, kernel_size=3, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=paddle.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), ) res = conv2d(input) @@ -170,10 +170,10 @@ def dyfunc_Conv2DTranspose(input): 12, 12, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), ) ret = conv2dTranspose(input) @@ -222,7 +222,7 @@ def dyfunc_Pool2D(input): def dyfunc_Prelu(input): prelu0 = paddle.nn.PReLU( weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), ) res = prelu0(input) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py index a6a9d72812..55a93f769e 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py @@ -115,11 +115,11 @@ class Conv1D(fluid.dygraph.Layer): k = 1.0 / math.sqrt(fan_in) param_attr = ParamAttr( name=prefix + "_w", - initializer=fluid.initializer.Uniform(low=-k, high=k), + initializer=paddle.nn.initializer.Uniform(low=-k, high=k), ) bias_attr = ParamAttr( name=prefix + "_b", - initializer=fluid.initializer.Uniform(low=-k, high=k), + initializer=paddle.nn.initializer.Uniform(low=-k, high=k), ) self._conv2d = paddle.nn.Conv2D( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py index c14631c35b..59df33e5aa 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py @@ -125,10 +125,10 @@ class MyConvLayer(fluid.dygraph.Layer): out_channels=2, kernel_size=3, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=paddle.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py index 0701750e30..b3556f0810 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py @@ -352,7 +352,7 @@ class conv2d(fluid.dygraph.Layer): con_bias_attr = False else: con_bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ) self.conv = paddle.nn.Conv2D( @@ -362,9 +362,7 @@ class conv2d(fluid.dygraph.Layer): stride=stride, padding=padding, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=stddev - ) + initializer=paddle.nn.initializer.Normal(mean=0.0, std=stddev) ), bias_attr=con_bias_attr, ) @@ -378,10 +376,10 @@ class conv2d(fluid.dygraph.Layer): use_global_stats=True, # set True to use deterministic algorithm num_channels=num_filters, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer(1.0, 0.02) + initializer=paddle.nn.initializer.Normal(1.0, 0.02) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), trainable_statistics=True, ) @@ -421,7 +419,7 @@ class DeConv2D(fluid.dygraph.Layer): de_bias_attr = False else: de_bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ) self._deconv = paddle.nn.Conv2DTranspose( @@ -431,9 +429,7 @@ class DeConv2D(fluid.dygraph.Layer): stride=stride, padding=padding, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=stddev - ) + initializer=paddle.nn.initializer.Normal(mean=0.0, std=stddev) ), bias_attr=de_bias_attr, ) @@ -444,10 +440,10 @@ class DeConv2D(fluid.dygraph.Layer): use_global_stats=True, # set True to use deterministic algorithm num_channels=num_filters, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer(1.0, 0.02) + initializer=paddle.nn.initializer.Normal(1.0, 0.02) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), trainable_statistics=True, ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py index 94e1dba493..0d108b4040 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py @@ -98,7 +98,7 @@ class BiGRU(fluid.dygraph.Layer): in_features=input_dim, out_features=grnn_hidden_dim * 3, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-init_bound, high=init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( @@ -111,7 +111,7 @@ class BiGRU(fluid.dygraph.Layer): size=grnn_hidden_dim, h_0=h_0, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-init_bound, high=init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( @@ -124,7 +124,7 @@ class BiGRU(fluid.dygraph.Layer): in_features=input_dim, out_features=grnn_hidden_dim * 3, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-init_bound, high=init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( @@ -138,7 +138,7 @@ class BiGRU(fluid.dygraph.Layer): is_reverse=True, h_0=h_0, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-init_bound, high=init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( @@ -375,7 +375,7 @@ class LexNet(fluid.dygraph.Layer): weight_attr=fluid.ParamAttr( learning_rate=self.emb_lr, name="word_emb", - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-self.init_bound, high=self.init_bound ), ), @@ -415,7 +415,7 @@ class LexNet(fluid.dygraph.Layer): in_features=self.grnn_hidden_dim * 2, out_features=self.num_labels, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-self.init_bound, high=self.init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index d708dc1ead..72f3dd7c33 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -22,7 +22,6 @@ from predictor_utils import PredictorTools import paddle import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr from paddle.jit.api import to_static from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX @@ -61,7 +60,8 @@ class ConvBNLayer(fluid.dygraph.Layer): padding=padding, groups=num_groups, weight_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "_weights" + initializer=paddle.nn.initializer.KaimingUniform(), + name=self.full_name() + "_weights", ), bias_attr=False, ) @@ -259,7 +259,8 @@ class MobileNetV1(fluid.dygraph.Layer): int(1024 * scale), class_dim, weight_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "fc7_weights" + initializer=paddle.nn.initializer.KaimingUniform(), + name=self.full_name() + "fc7_weights", ), bias_attr=ParamAttr(name="fc7_offset"), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index 53687ca6c1..1099f2dad6 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -49,26 +49,26 @@ class SimpleLSTMRNN(fluid.Layer): for i in range(self._num_layers): weight_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ), ) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) bias_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) @@ -158,7 +158,7 @@ class PtbModel(fluid.Layer): sparse=False, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -167,7 +167,7 @@ class PtbModel(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -175,7 +175,7 @@ class PtbModel(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index 911ca2ec90..407e11349c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -194,7 +194,7 @@ class ResNet(fluid.dygraph.Layer): self.pool2d_avg_output, class_dim, weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index e01b77af76..723a7c742c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -131,7 +131,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): num_channels, num_channels // reduction_ratio, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0) @@ -139,7 +139,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): num_channels // reduction_ratio, num_channels, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) @@ -316,7 +316,7 @@ class SeResNeXt(fluid.dygraph.Layer): self.pool2d_avg_output, class_dim, weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py index 5601325659..5eb7cfc108 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py @@ -230,7 +230,7 @@ class SkipGram(fluid.dygraph.Layer): self.embedding_size, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-0.5 / self.embedding_size, high=0.5 / self.embedding_size, ), @@ -242,7 +242,7 @@ class SkipGram(fluid.dygraph.Layer): self.embedding_size, weight_attr=fluid.ParamAttr( name='embedding_out_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-0.5 / self.embedding_size, high=0.5 / self.embedding_size, ), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index 18afc4a4ab..3928c715a6 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -62,10 +62,10 @@ class PrePostProcessLayer(Layer): paddle.nn.LayerNorm( normalized_shape=d_model, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), ), ) @@ -295,7 +295,7 @@ class Embedder(Layer): vocab_size, emb_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5) + initializer=paddle.nn.initializer.Normal(0.0, emb_dim**-0.5) ), ) @@ -330,7 +330,7 @@ class WrapEncoder(Layer): max_length, self.emb_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( position_encoding_init(max_length, self.emb_dim) ), trainable=False, @@ -522,7 +522,7 @@ class WrapDecoder(Layer): max_length, self.emb_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( position_encoding_init(max_length, self.emb_dim) ), trainable=False, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py index 10df42faa2..dbfc43cfc2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py @@ -253,10 +253,10 @@ class YOLOv3(fluid.dygraph.Layer): stride=1, padding=0, weight_attr=ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02) + initializer=paddle.nn.initializer.Normal(0.0, 0.02) ), bias_attr=ParamAttr( - initializer=fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), regularizer=L2Decay(0.0), ), ), diff --git a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py index 917beec752..895f71c485 100644 --- a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py +++ b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py @@ -70,7 +70,7 @@ def net(batch_size=4, lr=0.01): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -86,7 +86,7 @@ def net(batch_size=4, lr=0.01): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -99,7 +99,7 @@ def net(batch_size=4, lr=0.01): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py index d48091f6c1..1d2b442d2d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py @@ -27,7 +27,6 @@ from paddle.fluid.framework import ( Operator, convert_np_dtype_to_dtype_, ) -from paddle.fluid.initializer import NumpyArrayInitializer from paddle.static.quantization import ( QuantizationFreezePass, QuantizationTransformPass, @@ -305,7 +304,7 @@ def create_fake_model(program_config): shape=tensor_config.shape, type=core.VarDesc.VarType.LOD_TENSOR, name=name, - initializer=NumpyArrayInitializer(tensor_config.data), + initializer=paddle.nn.initializer.Assign(tensor_config.data), ) in_vars = [] for name in sorted(save_var_map.keys()): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py index 1f32de177e..536f6c4d60 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py @@ -30,7 +30,7 @@ class ConvBiasMkldnnFusePassSamePadTest(InferencePassTest): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -63,7 +63,7 @@ class ConvBiasMkldnnFusePassValidPadTest(ConvBiasMkldnnFusePassSamePadTest): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -89,7 +89,7 @@ class ConvBiasMkldnnFusePassExplictPadTest(ConvBiasMkldnnFusePassSamePadTest): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -114,7 +114,7 @@ class ConvBiasMkldnnFusePassGroupTest(ConvBiasMkldnnFusePassSamePadTest): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierUniform(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -145,7 +145,7 @@ class ConvBiasMkldnnFusePassDialtionsGroupsTest( name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -173,7 +173,7 @@ class ConvTransposeMkldnnFusePassDialtionsGroupsTest(InferencePassTest): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[-1, 3, 5, 5], dtype="float32") param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d_transpose( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py index da9a86725c..0e7eb56da9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py @@ -180,11 +180,11 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest): ) param_attr = fluid.ParamAttr( name='instance_norm_w', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) bias_attr = fluid.ParamAttr( name='instance_norm_b', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) out = paddle.static.nn.instance_norm( input=data, param_attr=param_attr, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py index 17672d668d..590ebbf63e 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py @@ -114,11 +114,11 @@ class TestBatchNorm(unittest.TestCase): shape[1], is_test=is_test, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=False, ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), trainable=False, ), trainable_statistics=trainable_statistics, @@ -262,7 +262,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase): net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), use_global_stats=self.use_global_stats, trainable_statistics=self.trainable_statistics, diff --git a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py index 5bf239b5bc..646466e950 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py @@ -322,12 +322,12 @@ class TestNetWithEpsilonTensor(unittest.TestCase): label_np = np.random.randint(2, size=(2, 1)).astype('int64') weight_attr1 = paddle.ParamAttr( name="weight1", - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), trainable=True, ) weight_attr2 = paddle.ParamAttr( name="weight2", - initializer=fluid.initializer.Constant(value=2.0), + initializer=paddle.nn.initializer.Constant(value=2.0), trainable=True, ) clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py index 69769bbdc1..5369f4d410 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py @@ -298,7 +298,7 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest): weight_attr = fluid.ParamAttr( name=self.input_names['Params'][0], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][0]] ), trainable=True, @@ -306,7 +306,7 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest): bias_attr = fluid.ParamAttr( name=self.input_names['Params'][1], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][1]] ), trainable=True, diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py index 11aacd0243..96b0b734a1 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py @@ -41,7 +41,7 @@ class SimpleNet(fluid.Layer): self.hidden_size, sparse=is_sparse, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ) ), @@ -50,7 +50,7 @@ class SimpleNet(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -58,7 +58,7 @@ class SimpleNet(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/simple_nets.py b/python/paddle/fluid/tests/unittests/simple_nets.py index a3ff2b6865..cf4372818b 100644 --- a/python/paddle/fluid/tests/unittests/simple_nets.py +++ b/python/paddle/fluid/tests/unittests/simple_nets.py @@ -26,7 +26,7 @@ def simple_fc_net_with_inputs(img, label, class_num=10): size=100, activation='relu', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) prediction = paddle.static.nn.fc( @@ -53,7 +53,7 @@ def batchnorm_fc_with_inputs(img, label, class_num=10): size=200, activation='relu', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py index d018c52506..37048d7cd2 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py @@ -25,11 +25,9 @@ paddle.enable_static() def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py index 89cd0453d7..4fca47635a 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py @@ -30,11 +30,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py index 047bd3ae8a..9c863d6d3b 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py @@ -25,11 +25,9 @@ paddle.enable_static() def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index d7052c9472..2983e5ca19 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -767,12 +767,12 @@ class TestAdamOptimizer(unittest.TestCase): label_np = np.random.randint(2, size=(2, 1)).astype('int64') weight_attr1 = paddle.ParamAttr( name="weight1", - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), trainable=True, ) weight_attr2 = paddle.ParamAttr( name="weight2", - initializer=fluid.initializer.Constant(value=2.0), + initializer=paddle.nn.initializer.Constant(value=2.0), trainable=True, ) clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) @@ -926,7 +926,7 @@ class TestAdamOptimizer(unittest.TestCase): main = fluid.Program() weight_attr = paddle.ParamAttr( name="weight1", - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), regularizer=fluid.regularizer.L1DecayRegularizer( regularization_coeff=0.1 ), diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py index 25e4ab9aa8..c15f647a38 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py @@ -39,7 +39,6 @@ from paddle.distributed.auto_parallel.partitioner import Partitioner from paddle.distributed.auto_parallel.reshard import Resharder from paddle.distributed.fleet import auto from paddle.fluid import core -from paddle.fluid.initializer import NumpyArrayInitializer if os.getenv("CUDA_VISIBLE_DEVICES") is not None: os.environ["CUDA_VISIBLE_DEVICES"] = "" @@ -373,10 +372,18 @@ class MLPLayer(nn.Layer): arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) arr2 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr3 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) - weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) - weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) - weight_attr2 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr2)) - weight_attr3 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr3)) + weight_attr0 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr0) + ) + weight_attr1 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr1) + ) + weight_attr2 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr2) + ) + weight_attr3 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr3) + ) bias_attr = None self.linear0 = nn.Linear( d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py b/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py index 77062eee5a..efbf4a538e 100644 --- a/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py +++ b/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py @@ -14,6 +14,7 @@ import unittest +import paddle import paddle.fluid as fluid @@ -22,7 +23,7 @@ class TestAvoidTwiceInitialization(unittest.TestCase): cur_program = fluid.Program() cur_block = cur_program.current_block() var = cur_block.create_parameter( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), shape=[2, 2], dtype='float32', name='var_a', @@ -40,7 +41,7 @@ class TestAvoidTwiceInitialization(unittest.TestCase): attrs={'ring_id': 0}, ) var2 = cur_block.create_parameter( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), shape=[2, 2], dtype='float32', name='var_a', diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index 05d9b71c1e..7414c3732b 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -26,7 +26,7 @@ class L1(fluid.Layer): def __init__(self): super().__init__() self._param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1) + initializer=paddle.nn.initializer.Constant(value=0.1) ) self.w1 = self.create_parameter( attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py index ece07889df..d6127ff5dd 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py @@ -183,11 +183,11 @@ class TestBatchNorm(unittest.TestCase): shape[1], is_test=is_test, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=False, ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), trainable=False, ), trainable_statistics=trainable_statistics, @@ -378,7 +378,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase): net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), use_global_stats=self.use_global_stats, trainable_statistics=self.trainable_statistics, diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py index d8d20e41aa..38a1284f0a 100644 --- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py @@ -48,7 +48,7 @@ class TestDoubleGrad(unittest.TestCase): name='x', shape=[1], dtype='float32', - default_initializer=fluid.initializer.Constant(3), + default_initializer=paddle.nn.initializer.Constant(3), ) (grad1,) = fluid.gradients(net(x), x) # 2x = 6 z = net(x - grad1) @@ -69,7 +69,7 @@ class TestDoubleGrad(unittest.TestCase): name='x', shape=[1], dtype='float32', - default_initializer=fluid.initializer.Constant(1), + default_initializer=paddle.nn.initializer.Constant(1), ) y = x * x (dx1,) = fluid.gradients(y, x) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index 0e836dca1c..a82c0e023c 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -41,7 +41,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase): size=[10000, 10], param_attr=fluid.ParamAttr( name="embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py index edd3d718c4..7cb0a06614 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py @@ -18,7 +18,6 @@ import numpy as np import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid, nn @@ -110,11 +109,11 @@ class Conv2DTestCase(unittest.TestCase): else (-1, self.num_channels, -1, -1) ) x_var = fluid.data("input", input_shape, dtype=self.dtype) - weight_attr = I.NumpyArrayInitializer(self.weight) + weight_attr = paddle.nn.initializer.Assign(self.weight) if self.bias is None: bias_attr = False else: - bias_attr = I.NumpyArrayInitializer(self.bias) + bias_attr = paddle.nn.initializer.Assign(self.bias) if self.padding_mode != 'zeros': x_var = F.pad( x_var, diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py index 9eeb4fc82d..50c80c3aa3 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py @@ -18,7 +18,6 @@ import numpy as np import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid, nn @@ -101,11 +100,11 @@ class Conv2DTransposeTestCase(unittest.TestCase): else (-1, self.num_channels, -1, -1) ) x_var = fluid.data("input", input_shape, dtype=self.dtype) - weight_attr = I.NumpyArrayInitializer(self.weight) + weight_attr = paddle.nn.initializer.Assign(self.weight) if self.bias is None: bias_attr = False else: - bias_attr = I.NumpyArrayInitializer(self.bias) + bias_attr = paddle.nn.initializer.Assign(self.bias) y_var = paddle.static.nn.conv2d_transpose( x_var, diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py index 78276fbf76..8ef86daf69 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py @@ -18,7 +18,6 @@ import numpy as np import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid, nn @@ -97,11 +96,11 @@ class Conv3DTestCase(unittest.TestCase): else (-1, self.num_channels, -1, -1, -1) ) x_var = fluid.data("input", input_shape, dtype=self.dtype) - weight_attr = I.NumpyArrayInitializer(self.weight) + weight_attr = paddle.nn.initializer.Assign(self.weight) if self.bias is None: bias_attr = False else: - bias_attr = I.NumpyArrayInitializer(self.bias) + bias_attr = paddle.nn.initializer.Assign(self.bias) y_var = paddle.static.nn.conv3d( x_var, self.num_filters, diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py index 1ea071142c..82c08348f4 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py @@ -18,7 +18,6 @@ import numpy as np import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid, nn @@ -99,11 +98,11 @@ class Conv3DTransposeTestCase(unittest.TestCase): else (-1, self.num_channels, -1, -1, -1) ) x_var = fluid.data("input", input_shape, dtype=self.dtype) - weight_attr = I.NumpyArrayInitializer(self.weight) + weight_attr = paddle.nn.initializer.Assign(self.weight) if self.bias is None: bias_attr = False else: - bias_attr = I.NumpyArrayInitializer(self.bias) + bias_attr = paddle.nn.initializer.Assign(self.bias) y_var = paddle.static.nn.conv3d_transpose( x_var, self.num_filters, diff --git a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py index 32f77ab290..b7a0c981ba 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py @@ -120,15 +120,15 @@ class TestGeneratorSeed(unittest.TestCase): result_1 = paddle.static.nn.fc( x, size=10, - weight_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0 + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 ), ) result_2 = paddle.static.nn.fc( x, size=10, - weight_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0 + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 ), ) diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py index e02282cb9b..a5f193daa4 100644 --- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py +++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py @@ -61,7 +61,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): size=hidden_size, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py index ecb49c3172..2bf68add10 100644 --- a/python/paddle/fluid/tests/unittests/test_desc_clone.py +++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py @@ -58,9 +58,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale - ) + initializer=paddle.nn.initializer.Normal(loc=0.0, scale=scale) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py index 1a01b7667f..d3622bd042 100644 --- a/python/paddle/fluid/tests/unittests/test_detach.py +++ b/python/paddle/fluid/tests/unittests/test_detach.py @@ -120,7 +120,7 @@ class Test_Detach(unittest.TestCase): initializer=paddle.nn.initializer.Constant(5.0) ) linear_b_param_attrs = fluid.ParamAttr( - initializer=fluid.initializer.Constant(6.0) + initializer=paddle.nn.initializer.Constant(6.0) ) linear = Linear( 4, @@ -132,7 +132,7 @@ class Test_Detach(unittest.TestCase): initializer=paddle.nn.initializer.Constant(7.0) ) linear1_b_param_attrs = fluid.ParamAttr( - initializer=fluid.initializer.Constant(8.0) + initializer=paddle.nn.initializer.Constant(8.0) ) linear1 = Linear( 10, diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py index c560dfa8db..f0f85e1645 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py @@ -58,7 +58,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): size=[1000000000, 100000], param_attr=paddle.fluid.ParamAttr( name="embedding", - initializer=paddle.fluid.initializer.Constant(value=0.01), + initializer=paddle.paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py index bc17b0d67f..69b341a026 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py @@ -89,7 +89,7 @@ class TestDistFleetHeterProgram(unittest.TestCase): size=[100001, 10], param_attr=fluid.ParamAttr( name="SparseFeatFactors", - initializer=fluid.initializer.Uniform(), + initializer=paddle.nn.initializer.Uniform(), ), ) @@ -103,8 +103,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): size=400, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(concated.shape[1]) + initializer=paddle.nn.initializer.Normal( + std=1 / math.sqrt(concated.shape[1]) ) ), name="fc1", @@ -116,8 +116,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): size=400, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc1.shape[1]) + initializer=paddle.nn.initializer.Normal( + std=1 / math.sqrt(fc1.shape[1]) ) ), name="fc2", @@ -129,8 +129,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): size=400, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc2.shape[1]) + initializer=paddle.nn.initializer.Normal( + std=1 / math.sqrt(fc2.shape[1]) ) ), name="fc3", @@ -142,8 +142,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): size=2, activation="softmax", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc3.shape[1]) + initializer=paddle.nn.initializer.Normal( + std=1 / math.sqrt(fc3.shape[1]) ) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py index b60ff0db63..da63b75f50 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py @@ -77,7 +77,7 @@ class TestPSMinimize(unittest.TestCase): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -91,7 +91,7 @@ class TestPSMinimize(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -107,7 +107,7 @@ class TestPSMinimize(unittest.TestCase): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -121,7 +121,7 @@ class TestPSMinimize(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -136,7 +136,7 @@ class TestPSMinimize(unittest.TestCase): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -150,7 +150,7 @@ class TestPSMinimize(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py index a330b45b52..ea30485e5a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py @@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py index 2143dc94d3..861e015568 100755 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py @@ -77,7 +77,7 @@ class TestPSPassWithBow(unittest.TestCase): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -91,7 +91,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -107,7 +107,7 @@ class TestPSPassWithBow(unittest.TestCase): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -121,7 +121,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -150,7 +150,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py index bee3cd9eb2..1ab2d51782 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py @@ -80,7 +80,7 @@ class TestPSPassWithBow(unittest.TestCase): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -94,7 +94,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -124,7 +124,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -139,7 +139,7 @@ class TestPSPassWithBow(unittest.TestCase): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -153,7 +153,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py index 58248d325b..b17451098f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py @@ -81,7 +81,7 @@ class TestPSPassWithBow(unittest.TestCase): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -95,7 +95,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -111,7 +111,7 @@ class TestPSPassWithBow(unittest.TestCase): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -154,7 +154,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py index e207fb859d..c9e6cb2035 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py @@ -80,7 +80,7 @@ class TestPSPassWithBow(unittest.TestCase): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -95,7 +95,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -111,7 +111,7 @@ class TestPSPassWithBow(unittest.TestCase): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -154,7 +154,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py index 4093fc34cc..2a5f845b93 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py @@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py index 025b3e90b3..094ea32967 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py @@ -77,7 +77,7 @@ class TestPSPassWithBow(unittest.TestCase): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -91,7 +91,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -107,7 +107,7 @@ class TestPSPassWithBow(unittest.TestCase): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -121,7 +121,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -150,7 +150,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py index 51bf54b324..40abc45e0a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py @@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__tmp_", learning_rate=emb_lr, ), @@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py index 165a8b6240..a5811d4e0f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py @@ -77,7 +77,7 @@ class TestPSPassWithBow(unittest.TestCase): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -91,7 +91,7 @@ class TestPSPassWithBow(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -107,7 +107,7 @@ class TestPSPassWithBow(unittest.TestCase): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -121,7 +121,7 @@ class TestPSPassWithBow(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -150,7 +150,7 @@ class TestPSPassWithBow(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py index 517232fa54..fae692f8fd 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py @@ -211,7 +211,7 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase): datas = [dnn_data, lr_data, label] inference = True - init = fluid.initializer.Uniform() + init = paddle.nn.initializer.Uniform() dnn_layer_dims = [128, 64, 32] dnn_embedding = fluid.contrib.layers.sparse_embedding( @@ -232,7 +232,7 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -245,7 +245,7 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase): is_test=inference, param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), ) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py index ba6e67a035..ebcbfb9e4c 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py @@ -75,7 +75,7 @@ class TestSPMT(unittest.TestCase): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -89,7 +89,7 @@ class TestSPMT(unittest.TestCase): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -105,7 +105,7 @@ class TestSPMT(unittest.TestCase): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -119,7 +119,7 @@ class TestSPMT(unittest.TestCase): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -134,7 +134,7 @@ class TestSPMT(unittest.TestCase): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -148,7 +148,7 @@ class TestSPMT(unittest.TestCase): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py index 368be77fdb..5ab7ad21db 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py +++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py @@ -38,9 +38,7 @@ class SparseLoadOp(unittest.TestCase): size=[10, 10], param_attr=fluid.ParamAttr( name="embedding", - initializer=fluid.initializer.NumpyArrayInitializer( - emb_array - ), + initializer=paddle.nn.initializer.Assign(emb_array), ), ) @@ -50,9 +48,7 @@ class SparseLoadOp(unittest.TestCase): activation="relu", weight_attr=fluid.ParamAttr( name='fc', - initializer=fluid.initializer.NumpyArrayInitializer( - fc_array - ), + initializer=paddle.nn.initializer.Assign(fc_array), ), ) loss = paddle.mean(fc1) diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py index 828b07baf7..548f2bf8a0 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_train.py @@ -87,7 +87,9 @@ class TestSendOp(unittest.TestCase): dtype='float32', name="X", ) - fluid.initializer.Constant(value=1.0)(x, main.global_block()) + paddle.nn.initializer.Constant(value=1.0)( + x, main.global_block() + ) ops._scale(x=x, scale=10.0, out=out_var) self.server_exe = fluid.Executor(place) @@ -108,7 +110,7 @@ class TestSendOp(unittest.TestCase): x = paddle.static.data(shape=[32, 32], dtype='float32', name='X') x.persistable = True - fluid.initializer.Constant(value=2.3)(x, main.global_block()) + paddle.nn.initializer.Constant(value=2.3)(x, main.global_block()) get_var = main.global_block().create_var( name="scale_0.tmp_0", # server side var @@ -116,7 +118,9 @@ class TestSendOp(unittest.TestCase): persistable=False, shape=[32, 32], ) - fluid.initializer.Constant(value=2.3)(get_var, main.global_block()) + paddle.nn.initializer.Constant(value=2.3)( + get_var, main.global_block() + ) # NOTE(zjl): `Send` is async send, which means that the sent # variable would be needed even though `Send` op runs. @@ -135,7 +139,7 @@ class TestSendOp(unittest.TestCase): main = fluid.Program() with fluid.program_guard(main): x = paddle.static.data(shape=[32, 32], dtype='float32', name='X') - fluid.initializer.Constant(value=2.3)(x, main.global_block()) + paddle.nn.initializer.Constant(value=2.3)(x, main.global_block()) o = paddle.scale(x=x, scale=10.0) exe = fluid.Executor(place) self.local_out = exe.run(main, fetch_list=[o]) diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index e9b8f773c7..e79a2f7276 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -356,7 +356,9 @@ class TestFakeInit(TranspilerTest): size=[dict_size, embedding_size], param_attr=fluid.ParamAttr( name='emb', - initializer=fluid.initializer.Uniform(-init_width, init_width), + initializer=paddle.nn.initializer.Uniform( + -init_width, init_width + ), ), ) @@ -365,7 +367,8 @@ class TestFakeInit(TranspilerTest): is_sparse=True, size=[dict_size, embedding_size], param_attr=fluid.ParamAttr( - name='emb_w', initializer=fluid.initializer.Constant(value=0.0) + name='emb_w', + initializer=paddle.nn.initializer.Constant(value=0.0), ), ) @@ -374,7 +377,8 @@ class TestFakeInit(TranspilerTest): is_sparse=True, size=[dict_size, 1], param_attr=fluid.ParamAttr( - name='emb_b', initializer=fluid.initializer.Constant(value=0.0) + name='emb_b', + initializer=paddle.nn.initializer.Constant(value=0.0), ), ) @@ -1327,7 +1331,7 @@ class TestRemoteNce(TestDistLookupTableBase): shape=[num_total_classes, 10], dtype='float32', name='nce_w', - initializer=fluid.initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) b_param = ( @@ -1337,7 +1341,7 @@ class TestRemoteNce(TestDistLookupTableBase): shape=[num_total_classes, 1], dtype='float32', name='nce_b', - initializer=fluid.initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) @@ -1405,7 +1409,7 @@ class TestRemoteHsigmoid(TestDistLookupTableBase): shape=[num_total_classes, 10], dtype='float32', name='hs_w', - initializer=fluid.initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) b_param = ( @@ -1415,7 +1419,7 @@ class TestRemoteHsigmoid(TestDistLookupTableBase): shape=[3, 1], dtype='float32', name='hs_b', - initializer=fluid.initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) @@ -1424,7 +1428,7 @@ class TestRemoteHsigmoid(TestDistLookupTableBase): is_sparse=is_sparse, size=[3, 3], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( + initializer=paddle.nn.initializer.Normal( scale=1 / math.sqrt(num_total_classes) ) ), diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py index a12a17636b..46977b13d7 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py @@ -41,7 +41,7 @@ def simple_fc_net(): size=200, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax') diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index 5657eb174c..05df1e96d7 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -129,7 +129,7 @@ def lm_model( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ) @@ -138,7 +138,7 @@ def lm_model( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) bias_arr.append(bias_1) @@ -250,7 +250,7 @@ def lm_model( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ) @@ -259,7 +259,7 @@ def lm_model( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) bias_arr.append(bias_1) @@ -368,7 +368,7 @@ def lm_model( is_sparse=False, param_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -406,7 +406,7 @@ def lm_model( [hidden_size, vocab_size], dtype="float32", name="softmax_weight", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ) @@ -414,7 +414,7 @@ def lm_model( [vocab_size], dtype="float32", name='softmax_bias', - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py index bd4e088195..d9ce93c913 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py @@ -312,7 +312,7 @@ class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1): size=self.input_dim, weight_attr=ParamAttr( name='W', - initializer=fluid.initializer.ConstantInitializer(1.0), + initializer=paddle.nn.initializer.Constant(1.0), ), bias_attr=False, ) @@ -321,7 +321,7 @@ class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1): size=self.input_dim, weight_attr=ParamAttr( name='U', - initializer=fluid.initializer.ConstantInitializer(0.0), + initializer=paddle.nn.initializer.Constant(0.0), ), bias_attr=False, ) diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index 9f05d354c4..3fb03ac89f 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -911,19 +911,19 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): 1, 3, bias_attr=False, - weight_attr=paddle.fluid.initializer.Uniform(), + weight_attr=paddle.nn.initializer.Uniform(), ) linear3 = paddle.nn.Linear( 1, 3, bias_attr=False, - weight_attr=paddle.fluid.initializer.TruncatedNormalInitializer(), + weight_attr=paddle.nn.initializer.TruncatedNormal(), ) linear4 = paddle.nn.Linear( 1, 3, bias_attr=False, - weight_attr=paddle.fluid.initializer.MSRAInitializer(), + weight_attr=paddle.nn.initializer.KaimingUniform(), ) res = [ linear1.weight.numpy(), diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py index ce83ba62ac..00cc6c07aa 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py @@ -19,7 +19,6 @@ import numpy as np import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid @@ -97,10 +96,10 @@ class TestFunctionalConv2D(TestCase): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.weight), + param_attr=paddle.nn.initializer.Assign(self.weight), bias_attr=False if self.no_bias - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=self.act, data_format=self.data_format, ) @@ -515,10 +514,10 @@ class TestFunctionalConv2DErrorCase12(TestCase): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), + param_attr=paddle.nn.initializer.Assign(self.filter), bias_attr=False if self.bias is None - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=None, data_format=self.data_format, ) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py index f45cf48afb..2981748cf6 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py @@ -19,7 +19,6 @@ import numpy as np import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid @@ -99,10 +98,10 @@ class TestFunctionalConv2D(TestCase): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.weight), + param_attr=paddle.nn.initializer.Assign(self.weight), bias_attr=False if self.no_bias - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), data_format=self.data_format, ) exe = fluid.Executor(self.place) @@ -523,10 +522,10 @@ class TestFunctionalConv2DErrorCase10(TestCase): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), + param_attr=paddle.nn.initializer.Assign(self.filter), bias_attr=False if self.bias is None - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=None, data_format=self.data_format, ) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py index bdd8360f97..62322f8e3d 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py @@ -19,7 +19,6 @@ import numpy as np import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid @@ -97,10 +96,10 @@ class TestFunctionalConv3D(TestCase): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.weight), + param_attr=paddle.nn.initializer.Assign(self.weight), bias_attr=False if self.no_bias - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=self.act, data_format=self.data_format, ) @@ -490,10 +489,10 @@ class TestFunctionalConv3DErrorCase11(TestCase): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), + param_attr=paddle.nn.initializer.Assign(self.filter), bias_attr=False if self.bias is None - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=None, data_format=self.data_format, ) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py index ae402c874e..7a8549b124 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py @@ -19,7 +19,6 @@ import numpy as np import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid @@ -99,10 +98,10 @@ class TestFunctionalConv3DTranspose(TestCase): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.weight), + param_attr=paddle.nn.initializer.Assign(self.weight), bias_attr=False if self.no_bias - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=self.act, data_format=self.data_format, ) @@ -548,10 +547,10 @@ class TestFunctionalConv3DTransposeErrorCase10(TestCase): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), + param_attr=paddle.nn.initializer.Assign(self.filter), bias_attr=False if self.bias is None - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=None, data_format=self.data_format, ) diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py index 4d7fb60d46..83574bae6b 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py @@ -37,11 +37,11 @@ class TestFuseBatchNormActPass(unittest.TestCase): ) param_attr = fluid.ParamAttr( name='batch_norm_w', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) bias_attr = fluid.ParamAttr( name='batch_norm_b', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) hidden2 = paddle.static.nn.batch_norm( input=hidden1, diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py index d981ccbe14..c00f10d91d 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py @@ -31,33 +31,33 @@ class TestFusedBnAddActAPI(unittest.TestCase): def setUp(self): self.conv_param_attr1 = fluid.ParamAttr( name='conv2d_1.weight', - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) self.conv_param_attr2 = fluid.ParamAttr( name='conv2d_2.weight', - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) self.bn_param_attr1 = fluid.ParamAttr( name='batch_norm_w_1', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) self.bn_bias_attr1 = fluid.ParamAttr( name='batch_norm_b_1', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) self.bn_param_attr2 = fluid.ParamAttr( name='batch_norm_w_2', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) self.bn_bias_attr2 = fluid.ParamAttr( name='batch_norm_b_2', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) self.fc_param_attr = fluid.ParamAttr( name='fc.weight', - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ) def build_fused_program( diff --git a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py index 8068387cfd..9264c8f2e7 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py @@ -53,7 +53,7 @@ class TestFusedMultiTransformerOp(OpTest): self.__class__.no_need_check_grad = False bias_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.Constant(value=0.0005) + initializer=paddle.paddle.nn.initializer.Constant(value=0.0005) ) self.q_proj = Linear( self.embed_dim, @@ -1027,16 +1027,16 @@ class TestFusedMultiTransformerOpPreCacheStatic(TestFusedMultiTransformerOp): self.has_attn_mask = False self.x_type = np.float32 self.weight_attr = paddle.ParamAttr( - initializer=paddle.fluid.initializer.Constant(0.0) + initializer=paddle.paddle.nn.initializer.Constant(0.0) ) self.bias_attr = paddle.ParamAttr( - initializer=paddle.fluid.initializer.Constant(0.0005) + initializer=paddle.paddle.nn.initializer.Constant(0.0005) ) self.ln_w_attr = paddle.ParamAttr( - initializer=paddle.fluid.initializer.Constant(1.0) + initializer=paddle.paddle.nn.initializer.Constant(1.0) ) self.ln_b_attr = paddle.ParamAttr( - initializer=paddle.fluid.initializer.Constant(0.0) + initializer=paddle.paddle.nn.initializer.Constant(0.0) ) def test_fused_multi_transformer_op(self): diff --git a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py index fcbc91edee..b0625050b8 100644 --- a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py +++ b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py @@ -62,7 +62,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): size=hidden_size, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py index 75e5d1ee2e..abf0ba0ac2 100644 --- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py @@ -20,7 +20,6 @@ from op_test import OpTest, skip_check_grad_ci import paddle import paddle.fluid as fluid -import paddle.fluid.initializer as I import paddle.nn.functional as F paddle.enable_static() @@ -302,7 +301,7 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase): is_sparse=is_sparse, size=[3, 3], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal(scale=1 / math.sqrt(3)) + initializer=paddle.nn.initializer.Normal(std=1 / math.sqrt(3)) ), ) @@ -555,8 +554,8 @@ class TestHSigmoidLossAPI(unittest.TestCase): x, labels, self.num_classes, weight, bias, path_table, path_code ) - weight_attr = I.NumpyArrayInitializer(self.weight_np) - bias_attr = I.NumpyArrayInitializer(self.bias_np) + weight_attr = paddle.nn.initializer.Assign(self.weight_np) + bias_attr = paddle.nn.initializer.Assign(self.bias_np) m = paddle.nn.HSigmoidLoss( self.feature_size, self.num_classes, @@ -593,10 +592,10 @@ class TestHSigmoidLossAPI(unittest.TestCase): ) weight_attr = paddle.framework.ParamAttr( - initializer=I.NumpyArrayInitializer(self.weight_np) + initializer=paddle.nn.initializer.Assign(self.weight_np) ) bias_attr = paddle.framework.ParamAttr( - initializer=I.NumpyArrayInitializer(self.bias_np) + initializer=paddle.nn.initializer.Assign(self.bias_np) ) m = paddle.nn.HSigmoidLoss( self.feature_size, @@ -636,8 +635,8 @@ class TestHSigmoidLossAPI(unittest.TestCase): if self.is_custom: path_table = fluid.data('path_table', [-1, -1], 'int64') path_code = fluid.data('path_code', [-1, -1], 'int64') - weight_attr = I.NumpyArrayInitializer(self.weight_np) - bias_attr = I.NumpyArrayInitializer(self.bias_np) + weight_attr = paddle.nn.initializer.Assign(self.weight_np) + bias_attr = paddle.nn.initializer.Assign(self.bias_np) loss = paddle.nn.HSigmoidLoss( feature_size=x.shape[1], num_classes=self.num_classes, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py index f34c8d6a2a..51e32c5259 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py @@ -123,7 +123,7 @@ class DeepCF(fluid.Layer): shape=matrix.shape, dtype=matrix.dtype, is_bias=False, - default_initializer=fluid.initializer.NumpyArrayInitializer(matrix), + default_initializer=paddle.nn.initializer.Assign(matrix), ) self._rating_matrix.stop_gradient = True diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index 0eb037bc6a..af6e32ac6b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -47,7 +47,7 @@ class SimpleNet(fluid.Layer): sparse=is_sparse, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -56,7 +56,7 @@ class SimpleNet(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 12be3af2d9..5c48252cb0 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -77,12 +77,12 @@ class ConvBNPool(fluid.dygraph.Layer): filter_size = 3 conv_std_0 = (2.0 / (filter_size**2 * channels[0])) ** 0.5 conv_param_0 = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, conv_std_0) + initializer=paddle.nn.initializer.Normal(0.0, conv_std_0) ) conv_std_1 = (2.0 / (filter_size**2 * channels[1])) ** 0.5 conv_param_1 = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, conv_std_1) + initializer=paddle.nn.initializer.Normal(0.0, conv_std_1) ) self.conv_0_layer = paddle.nn.Conv2D( @@ -200,10 +200,11 @@ class EncoderNet(fluid.dygraph.Layer): super().__init__() self.rnn_hidden_size = rnn_hidden_size para_attr = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02) + initializer=paddle.nn.initializer.Normal(0.0, 0.02) ) bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0 + initializer=paddle.nn.initializer.Normal(0.0, 0.02), + learning_rate=2.0, ) if fluid.framework._non_static_mode(): h_0 = np.zeros( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 6eb0c9d6e6..8917230d52 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -51,26 +51,26 @@ class SimpleLSTMRNN(fluid.Layer): for i in range(self._num_layers): weight_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ), ) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) bias_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) @@ -176,7 +176,7 @@ class PtbModel(fluid.Layer): sparse=is_sparse, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -185,7 +185,7 @@ class PtbModel(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -193,7 +193,7 @@ class PtbModel(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index bc46ad12d3..2936b07303 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -226,7 +226,7 @@ class ResNet(fluid.Layer): self.pool2d_avg_output, class_dim, weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index 7fd322d358..2ef0b8afcc 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -48,26 +48,26 @@ class SimpleLSTMRNN(fluid.Layer): for i in range(self._num_layers): weight_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ), ) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) bias_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) @@ -172,7 +172,7 @@ class PtbModel(fluid.Layer): sparse=False, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -182,7 +182,7 @@ class PtbModel(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -190,7 +190,7 @@ class PtbModel(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index 647710fba6..fb833c6525 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -48,7 +48,7 @@ class SimpleNet(fluid.Layer): sparse=is_sparse, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -57,7 +57,7 @@ class SimpleNet(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.hidden_size, self.hidden_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -65,7 +65,7 @@ class SimpleNet(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.hidden_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index e171899289..46bd8890d2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -399,10 +399,10 @@ class PrePostProcessLayer(Layer): self._layer_norm = paddle.nn.LayerNorm( normalized_shape=d_model, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), ) @@ -662,7 +662,9 @@ class PrepareEncoderDecoderLayer(Layer): sparse=is_sparse, weight_attr=fluid.ParamAttr( name=word_emb_param_name, - initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), + initializer=paddle.nn.initializer.Normal( + 0.0, src_emb_dim**-0.5 + ), ), ) @@ -676,7 +678,7 @@ class PrepareEncoderDecoderLayer(Layer): sparse=is_sparse, weight_attr=fluid.ParamAttr( name=pos_enc_param_name, - initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), + initializer=paddle.nn.initializer.Assign(pos_inp), trainable=False, ), ) diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 07d9d7b48c..f87e62cb02 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -20,7 +20,6 @@ import numpy as np import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework -import paddle.fluid.initializer as initializer from paddle.fluid.core import VarDesc from paddle.regularizer import L2Decay @@ -67,7 +66,7 @@ class TestConstantInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) num_ops = 1 self.assertEqual(len(block.ops), num_ops) @@ -86,7 +85,7 @@ class TestConstantInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.ConstantInitializer(2.3), + initializer=paddle.nn.initializer.Constant(2.3), ) num_ops = 1 self.assertEqual(len(block.ops), num_ops) @@ -119,7 +118,7 @@ class TestUniformInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.UniformInitializer(), + initializer=paddle.nn.initializer.Uniform(), ) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) @@ -141,14 +140,14 @@ class TestUniformInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param1", - initializer=initializer.UniformInitializer(), + initializer=paddle.nn.initializer.Uniform(), ) block.create_parameter( dtype="float32", shape=[5, 10], lod_level=0, name="param2", - initializer=initializer.UniformInitializer(seed=456), + initializer=paddle.nn.initializer.UniformInitializer(seed=456), ) init_op = block.ops[1] self.assertEqual(init_op.attr("seed"), 456) @@ -165,7 +164,9 @@ class TestUniformInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.UniformInitializer(-4.2, 3.1, 123), + initializer=paddle.nn.initializer.UniformInitializer( + -4.2, 3.1, 123 + ), ) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) @@ -186,7 +187,9 @@ class TestUniformInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.UniformInitializer(-4.2, float(i), 123), + initializer=paddle.nn.initializer.UniformInitializer( + -4.2, float(i), 123 + ), ) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) @@ -226,7 +229,7 @@ class TestNormalInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.NormalInitializer(), + initializer=paddle.nn.initializer.Normal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -245,7 +248,9 @@ class TestNormalInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.NormalInitializer(2.3, 1.9, 123), + initializer=paddle.nn.initializer.NormalInitializer( + 2.3, 1.9, 123 + ), ) num_ops = 1 self.assertEqual(len(block.ops), num_ops) @@ -278,7 +283,7 @@ class TestXavierInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.XavierInitializer(), + initializer=paddle.nn.initializer.XavierUniform(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -300,7 +305,7 @@ class TestXavierInitializer(unittest.TestCase): shape=[5, 10, 15, 20], lod_level=0, name="param", - initializer=initializer.XavierInitializer(), + initializer=paddle.nn.initializer.XavierUniform(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -325,7 +330,7 @@ class TestXavierInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.XavierInitializer(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -347,7 +352,7 @@ class TestXavierInitializer(unittest.TestCase): shape=[5, 10, 15, 20], lod_level=0, name="param", - initializer=initializer.XavierInitializer(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -372,7 +377,7 @@ class TestXavierInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.XavierInitializer( + initializer=paddle.nn.initializer.XavierInitializer( uniform=uniform, fan_in=12, fan_out=23, seed=134 ), ) @@ -421,7 +426,7 @@ class TestMSRAInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(), + initializer=paddle.nn.initializer.KaimingUniform(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -443,7 +448,7 @@ class TestMSRAInitializer(unittest.TestCase): shape=[5, 10, 15, 20], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(), + initializer=paddle.nn.initializer.KaimingUniform(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -466,7 +471,7 @@ class TestMSRAInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(uniform=False), + initializer=paddle.nn.initializer.KaimingNormal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -488,7 +493,7 @@ class TestMSRAInitializer(unittest.TestCase): shape=[5, 10, 15, 20], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(uniform=False), + initializer=paddle.nn.initializer.KaimingNormal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -509,7 +514,9 @@ class TestMSRAInitializer(unittest.TestCase): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(fan_in=12, seed=134), + initializer=paddle.nn.initializer.MSRAInitializer( + fan_in=12, seed=134 + ), ) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) @@ -542,7 +549,7 @@ class TestBilinearInitializer(unittest.TestCase): shape=[8, 1, 3, 3], lod_level=0, name="param", - initializer=initializer.BilinearInitializer(), + initializer=paddle.nn.initializer.Bilinear(), ) num_ops = 2 if dtype in ["float16", "uint16", "float64"] else 1 self.assertEqual(len(block.ops), num_ops) @@ -576,7 +583,7 @@ class TestBilinearInitializerDygraphAPI(unittest.TestCase): w_attr = paddle.ParamAttr( learning_rate=0.0, regularizer=L2Decay(0.0), - initializer=initializer.BilinearInitializer(), + initializer=paddle.nn.initializer.Bilinear(), ) data = paddle.rand([B, 3, H, W], dtype='float32') conv_up = paddle.nn.Conv2DTranspose( @@ -597,7 +604,7 @@ class TestBilinearInitializerDygraphAPI(unittest.TestCase): w_attr = paddle.ParamAttr( learning_rate=0.0, regularizer=L2Decay(0.0), - initializer=initializer.BilinearInitializer(), + initializer=paddle.nn.initializer.Bilinear(), ) conv2d = paddle.nn.Conv2D(1, 2, 3, weight_attr=w_attr) paddle.set_default_dtype("float32") @@ -632,7 +639,7 @@ class TestNumpyArrayInitializer(unittest.TestCase): shape=np_array.shape, lod_level=0, name="param", - initializer=initializer.NumpyArrayInitializer(np_array), + initializer=paddle.nn.initializer.Assign(np_array), ) num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) @@ -657,7 +664,9 @@ class TestSetGlobalInitializer(unittest.TestCase): """Test Set Global Param initilizer with UniformInitializer""" main_prog = framework.Program() startup_prog = framework.Program() - fluid.set_global_initializer(initializer.Uniform(low=-0.5, high=0.5)) + fluid.set_global_initializer( + paddle.nn.initializer.Uniform(low=-0.5, high=0.5) + ) with fluid.program_guard(main_prog, startup_prog): x = fluid.data(name="x", shape=[1, 3, 32, 32]) # default initilizer of param in layers.conv2d is NormalInitializer @@ -683,8 +692,8 @@ class TestSetGlobalInitializer(unittest.TestCase): main_prog = framework.Program() startup_prog = framework.Program() fluid.set_global_initializer( - initializer.Uniform(low=-0.5, high=0.5), - bias_init=initializer.Normal(loc=0.0, scale=2.0), + paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_init=paddle.nn.initializer.Normal(0.0, 2.0), ) with fluid.program_guard(main_prog, startup_prog): x = fluid.data(name="x", shape=[1, 3, 32, 32]) @@ -746,9 +755,7 @@ class TestXavierInitializerDygraph(unittest.TestCase): tensor = paddle.zeros([1024, 1024, 16]) tensor.stop_gradient = False - xavier_ = paddle.fluid.initializer.XavierInitializer( - uniform=False, fan_in=3, fan_out=5 - ) + xavier_ = paddle.nn.initializer.XavierNormal(fan_in=3, fan_out=5) xavier_(tensor) hist, _ = output_hist(tensor.numpy()) @@ -771,9 +778,7 @@ class TestMSRAInitializerDygraph(unittest.TestCase): tensor = paddle.zeros([1024, 1024, 16]) tensor.stop_gradient = False - msra_ = paddle.fluid.initializer.MSRAInitializer( - uniform=False, fan_in=4 - ) + msra_ = paddle.nn.initializer.KaimingNormal(fan_in=4) msra_(tensor) hist, _ = output_hist(tensor.numpy()) @@ -1188,7 +1193,7 @@ class TestKaimingUniform(unittest.TestCase): def test_type_error(self): self.assertRaises( - ValueError, self.func_kaiminguniform_initializer_fan_in_zero + ZeroDivisionError, self.func_kaiminguniform_initializer_fan_in_zero ) diff --git a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py index bc4ef3d386..7dcf964c41 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py +++ b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py @@ -34,7 +34,7 @@ def fc_with_batchnorm(use_feed): size=200, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 51715e2ae1..192585e6c1 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -138,7 +138,9 @@ class TestLayer(LayerTest): name='data', shape=[3, 32, 32], dtype='float32' ) linear = paddle.nn.Linear( - 32, 4, bias_attr=fluid.initializer.ConstantInitializer(value=1) + 32, + 4, + bias_attr=paddle.nn.initializer.Constant(value=1), ) ret = linear(t) static_ret = self.get_static_graph_result( @@ -147,7 +149,9 @@ class TestLayer(LayerTest): with self.dynamic_graph(): t = base.to_variable(inp) linear = paddle.nn.Linear( - 32, 4, bias_attr=fluid.initializer.ConstantInitializer(value=1) + 32, + 4, + bias_attr=paddle.nn.initializer.Constant(value=1), ) dy_ret = linear(t) dy_ret_value = dy_ret.numpy() @@ -162,7 +166,7 @@ class TestLayer(LayerTest): linear = paddle.nn.Linear( 32, 4, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) linear_ret1 = linear(inp) @@ -175,7 +179,7 @@ class TestLayer(LayerTest): linear = paddle.nn.Linear( 32, 4, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) linear_ret2 = linear(inp) @@ -248,7 +252,7 @@ class TestLayer(LayerTest): linear = paddle.nn.Linear( 32, 4, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) linear_ret1 = linear(inp) @@ -261,7 +265,7 @@ class TestLayer(LayerTest): linear = paddle.nn.Linear( 32, 4, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) linear_ret2 = linear(inp) @@ -396,7 +400,7 @@ class TestLayer(LayerTest): num_filters=10, filter_size=27, act='sigmoid', - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) static_rlt = self.get_static_graph_result( feed={'pixel': inp_np}, fetch_list=[out] @@ -409,7 +413,7 @@ class TestLayer(LayerTest): 3, 10, 27, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) out = conv2d_transpose(img) out = paddle.nn.functional.sigmoid(out) @@ -421,7 +425,7 @@ class TestLayer(LayerTest): 3, 10, 27, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) dy_rlt = conv2d_transpose(base.to_variable(inp_np)) dy_rlt = paddle.nn.functional.sigmoid(dy_rlt) @@ -433,9 +437,7 @@ class TestLayer(LayerTest): images = np.ones([2, 3, 5, 5], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) conv2d1 = paddle.nn.Conv2DTranspose(3, 3, [2, 2]) conv2d2 = paddle.nn.Conv2DTranspose( @@ -503,7 +505,7 @@ class TestLayer(LayerTest): data_x, data_y, 6, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), act='sigmoid', ) @@ -518,7 +520,7 @@ class TestLayer(LayerTest): 3, 3, 6, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) out = btp(data_x, data_y) out = paddle.nn.functional.sigmoid(out) @@ -530,7 +532,7 @@ class TestLayer(LayerTest): 3, 3, 6, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y)) dy_rlt = paddle.nn.functional.sigmoid(dy_rlt) @@ -566,9 +568,7 @@ class TestLayer(LayerTest): with self.dynamic_graph(): custom_weight = np.random.randn(6, 3, 3).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) btp1 = paddle.nn.Bilinear(3, 3, 6) btp2 = paddle.nn.Bilinear(3, 3, 6, weight_attr=weight_attr) @@ -641,9 +641,7 @@ class TestLayer(LayerTest): with self.dynamic_graph(): custom_weight = np.random.randn(dict_size, 32).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False) emb2 = paddle.nn.Embedding( @@ -741,9 +739,7 @@ class TestLayer(LayerTest): images = np.ones([2, 3, 6, 6, 6], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2, 2).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) conv3d1 = paddle.nn.Conv3D( in_channels=3, out_channels=3, kernel_size=2 @@ -798,8 +794,8 @@ class TestLayer(LayerTest): ret = paddle.static.nn.group_norm( input=X, groups=2, - param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), - bias_attr=fluid.initializer.ConstantInitializer(value=1), + param_attr=paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_attr=paddle.nn.initializer.Constant(value=1), ) static_ret = self.get_static_graph_result( feed={ @@ -818,8 +814,8 @@ class TestLayer(LayerTest): groupNorm = paddle.nn.GroupNorm( num_channels=shape[1], num_groups=2, - weight_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), - bias_attr=fluid.initializer.ConstantInitializer(value=1), + weight_attr=paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_attr=paddle.nn.initializer.Constant(value=1), ) ret = groupNorm(X) static_ret2 = self.get_static_graph_result( @@ -836,8 +832,8 @@ class TestLayer(LayerTest): groupNorm = paddle.nn.GroupNorm( num_channels=shape[1], num_groups=2, - weight_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), - bias_attr=fluid.initializer.ConstantInitializer(value=1), + weight_attr=paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_attr=paddle.nn.initializer.Constant(value=1), ) dy_ret = groupNorm(base.to_variable(input)) dy_rlt_value = dy_ret.numpy() @@ -990,9 +986,7 @@ class TestLayer(LayerTest): images = np.ones([2, 3, 6, 6, 6], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2, 2).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) conv3d1 = paddle.nn.Conv3DTranspose( in_channels=3, @@ -2213,13 +2207,13 @@ class TestBook(LayerTest): param_attr=fluid.ParamAttr( learning_rate=1.0, name="w_0", - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ), bias_size=[16, 10], bias_attr=fluid.ParamAttr( learning_rate=1.0, name="b_0", - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ), act="relu", ) @@ -2238,7 +2232,7 @@ class TestBook(LayerTest): rank_param_attr=fluid.ParamAttr( learning_rate=1.0, name="ubm_rank_param.w_0", - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ), max_rank=3, ) diff --git a/python/paddle/fluid/tests/unittests/test_linear.py b/python/paddle/fluid/tests/unittests/test_linear.py index 71f5c831ae..36496004b1 100644 --- a/python/paddle/fluid/tests/unittests/test_linear.py +++ b/python/paddle/fluid/tests/unittests/test_linear.py @@ -50,14 +50,14 @@ class LinearTestCase(unittest.TestCase): learning_rate=1.0, trainable=False, regularizer=None, - initializer=paddle.fluid.initializer.ConstantInitializer(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) bias_attr = fluid.ParamAttr( name="linear_bias", learning_rate=1.0, trainable=False, regularizer=None, - initializer=paddle.fluid.initializer.ConstantInitializer(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) linear = paddle.nn.Linear( 2, 2, weight_attr=weight_attr, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py index cc11e96f5a..649a2e5937 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py @@ -217,7 +217,7 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): """ def set_initializer(self): - self.initializer = fluid.initializer.Constant(value=self.value) + self.initializer = paddle.nn.initializer.Constant(value=self.value) def setUp(self): self.ids_shape = [4, 1] diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py index 8cbc6242b3..0f6affcd26 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py @@ -84,7 +84,7 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): """ def set_initializer(self): - self.initializer = fluid.initializer.Constant(value=self.value) + self.initializer = paddle.nn.initializer.Constant(value=self.value) def setUp(self): self.op_type = "lookup_table_v2" diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index 74b6eec719..6aea5ef118 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -209,9 +209,7 @@ class TestLookupTableIsSparse(unittest.TestCase): param_attr=fluid.ParamAttr( name="emb_weight", learning_rate=10, - initializer=fluid.initializer.NumpyArrayInitializer( - self.w_data - ), + initializer=paddle.nn.initializer.Assign(self.w_data), ), is_sparse=is_sparse, ) diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py index a4dc9f3327..bdc4af3bdc 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py @@ -58,10 +58,10 @@ def simple_fc_net_static(): label = fluid.data(name='label', shape=[None, 1], dtype='int64') hidden = image param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.8) + initializer=paddle.nn.initializer.Constant(value=0.8) ) bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ) for hidden_size in [10, 20, 30]: hidden = paddle.static.nn.fc( diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py index 24c008a602..a38c77386a 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py @@ -58,10 +58,10 @@ def simple_fc_net_static(): label = fluid.data(name='label', shape=[None, 1], dtype='int64') hidden = image param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.8) + initializer=paddle.nn.initializer.Constant(value=0.8) ) bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ) for hidden_size in [10, 20, 30]: hidden = paddle.static.nn.fc( diff --git a/python/paddle/fluid/tests/unittests/test_nce.py b/python/paddle/fluid/tests/unittests/test_nce.py index e2923da711..80787e7fd3 100644 --- a/python/paddle/fluid/tests/unittests/test_nce.py +++ b/python/paddle/fluid/tests/unittests/test_nce.py @@ -19,7 +19,6 @@ from op_test import OpTest import paddle import paddle.fluid as fluid -import paddle.fluid.initializer as initializer from paddle.fluid import Program, program_guard @@ -199,7 +198,7 @@ class TestNCECase1SelectedRows(unittest.TestCase): shape=[num_total_classes, 10], dtype='float32', name='nce_w', - initializer=initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) b_param = ( @@ -209,7 +208,7 @@ class TestNCECase1SelectedRows(unittest.TestCase): shape=[num_total_classes, 1], dtype='float32', name='nce_b', - initializer=initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py index d89af631ba..95df8aa0be 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py @@ -27,7 +27,7 @@ class EmbeddingStatic(unittest.TestCase): with fluid.program_guard(prog): def test_bad_x(): - initializer = fluid.initializer.NumpyArrayInitializer( + initializer = paddle.nn.initializer.Assign( np.random.random(size=(128, 100)) ) @@ -59,7 +59,7 @@ class EmbeddingStatic(unittest.TestCase): with fluid.program_guard(prog): def test_bad_x(): - initializer = fluid.initializer.NumpyArrayInitializer( + initializer = paddle.nn.initializer.Assign( np.random.random(size=(128, 100)) ) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py index fc5fbec82c..626521577d 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py @@ -81,20 +81,20 @@ class SimpleNetWithCond: dtype="float32", shape=self.shape, attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_x"), - default_initializer=fluid.initializer.NumpyArrayInitializer(self.x), + default_initializer=paddle.nn.initializer.Assign(self.x), ) param_y = paddle.create_parameter( dtype="float32", shape=self.shape, attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_y"), - default_initializer=fluid.initializer.NumpyArrayInitializer(self.y), + default_initializer=paddle.nn.initializer.Assign(self.y), ) param_z = paddle.create_parameter( dtype="float32", shape=self.shape, attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_z"), - default_initializer=fluid.initializer.NumpyArrayInitializer(self.z), + default_initializer=paddle.nn.initializer.Assign(self.z), ) sum_xy = paddle.add(param_x, param_y, name='sum_xy') diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py index 3b32c9ca4e..ab9b99d8cb 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py @@ -51,10 +51,10 @@ def static( size=FC_SIZE, activation='relu', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), name="hidden", ) @@ -64,10 +64,10 @@ def static( size=CLASS_NUM, activation='softmax', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.2) + initializer=paddle.nn.initializer.Constant(value=1.2) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.8) + initializer=paddle.nn.initializer.Constant(value=0.8) ), name="prediction", ) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index 850ddc379c..1f6429620f 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -33,7 +33,7 @@ def simple_fc_net(use_feed): size=200, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax') @@ -56,7 +56,7 @@ def fc_with_batchnorm(use_feed): size=200, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_parameter.py b/python/paddle/fluid/tests/unittests/test_parameter.py index 5ce6f31318..909feb2a48 100644 --- a/python/paddle/fluid/tests/unittests/test_parameter.py +++ b/python/paddle/fluid/tests/unittests/test_parameter.py @@ -23,7 +23,6 @@ import paddle.fluid.io as io from paddle.fluid.dygraph import guard from paddle.fluid.executor import Executor from paddle.fluid.framework import ParamBase, Variable, default_main_program -from paddle.fluid.initializer import ConstantInitializer paddle.enable_static() main_program = default_main_program() @@ -38,7 +37,7 @@ class ParameterChecks(unittest.TestCase): name='fc.w', shape=shape, dtype='float32', - initializer=ConstantInitializer(val), + initializer=paddle.nn.initializer.Constant(val), ) self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) diff --git a/python/paddle/fluid/tests/unittests/test_prelu_op.py b/python/paddle/fluid/tests/unittests/test_prelu_op.py index 9c95d5b946..4a4d5921bb 100644 --- a/python/paddle/fluid/tests/unittests/test_prelu_op.py +++ b/python/paddle/fluid/tests/unittests/test_prelu_op.py @@ -153,7 +153,7 @@ class TestNNPReluAPI(unittest.TestCase): x = paddle.to_tensor(self.x_np) m = paddle.nn.PReLU( weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.5) + initializer=paddle.nn.initializer.Constant(0.5) ) ) out = m(x) @@ -438,7 +438,7 @@ def prelu_t(x, mode, param_attr=None, name=None, data_format='NCHW'): shape=alpha_shape, dtype='float32', is_bias=False, - default_initializer=fluid.initializer.ConstantInitializer(0.25), + default_initializer=paddle.nn.initializer.Constant(0.25), ) out = helper.create_variable_for_type_inference(dtype) helper.append_op( diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py index 5364dcaa6e..885c8fa829 100755 --- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py +++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py @@ -40,7 +40,7 @@ def simple_fc_net_with_accuracy(use_feed): size=200, activation='relu', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax') diff --git a/python/paddle/fluid/tests/unittests/test_prune.py b/python/paddle/fluid/tests/unittests/test_prune.py index 30e3aefe0a..a93516da41 100644 --- a/python/paddle/fluid/tests/unittests/test_prune.py +++ b/python/paddle/fluid/tests/unittests/test_prune.py @@ -170,7 +170,7 @@ class TestExecutorRunAutoPrune(unittest.TestCase): w_param_attrs = fluid.ParamAttr( name="fc_weight", learning_rate=0.5, - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=True, ) y = paddle.static.nn.fc( @@ -198,13 +198,13 @@ class TestExecutorRunAutoPrune(unittest.TestCase): w1_param_attrs = fluid.ParamAttr( name="fc_weight1", learning_rate=0.5, - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=True, ) w2_param_attrs = fluid.ParamAttr( name="fc_weight2", learning_rate=0.5, - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=True, ) y1 = paddle.static.nn.fc( diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py index a90e37a475..526e08e9d5 100644 --- a/python/paddle/fluid/tests/unittests/test_py_func_op.py +++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py @@ -79,7 +79,7 @@ def simple_fc_net(img, label, use_py_func_op): hidden, size=200, bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) if not use_py_func_op: diff --git a/python/paddle/fluid/tests/unittests/test_random_seed.py b/python/paddle/fluid/tests/unittests/test_random_seed.py index 856b2be783..0798fa8864 100644 --- a/python/paddle/fluid/tests/unittests/test_random_seed.py +++ b/python/paddle/fluid/tests/unittests/test_random_seed.py @@ -378,15 +378,15 @@ class TestGeneratorSeed(unittest.TestCase): result_1 = paddle.static.nn.fc( x, size=10, - weight_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0 + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 ), ) result_2 = paddle.static.nn.fc( x, size=10, - weight_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0 + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 ), ) diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py index 4ea5ed0e0d..8991b14384 100644 --- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py @@ -301,7 +301,7 @@ class RecurrentOpTest2(RecurrentOpTest1): size=self.input_dim, weight_attr=ParamAttr( name='W', - initializer=fluid.initializer.ConstantInitializer(1.0), + initializer=paddle.nn.initializer.Constant(1.0), ), bias_attr=False, ) @@ -310,7 +310,7 @@ class RecurrentOpTest2(RecurrentOpTest1): size=self.input_dim, weight_attr=ParamAttr( name='U', - initializer=fluid.initializer.ConstantInitializer(0.0), + initializer=paddle.nn.initializer.Constant(0.0), ), bias_attr=False, ) @@ -686,7 +686,7 @@ class RecurrentOpStopGradientTest(RecurrentOpTest1): size=self.input_dim, weight_attr=ParamAttr( name="W", - initializer=fluid.initializer.ConstantInitializer(1.0), + initializer=paddle.nn.initializer.Constant(1.0), ), bias_attr=False, ) @@ -695,7 +695,7 @@ class RecurrentOpStopGradientTest(RecurrentOpTest1): size=self.input_dim, weight_attr=ParamAttr( name="U", - initializer=fluid.initializer.ConstantInitializer(0.0), + initializer=paddle.nn.initializer.Constant(0.0), ), bias_attr=False, ) diff --git a/python/paddle/fluid/tests/unittests/test_row_conv_op.py b/python/paddle/fluid/tests/unittests/test_row_conv_op.py index d160a99825..408a5f8a74 100644 --- a/python/paddle/fluid/tests/unittests/test_row_conv_op.py +++ b/python/paddle/fluid/tests/unittests/test_row_conv_op.py @@ -197,7 +197,7 @@ class TestRowConvLayer(unittest.TestCase): out = paddle.static.nn.row_conv( x, self.context_length, - param_attr=fluid.initializer.NumpyArrayInitializer(self.w), + param_attr=paddle.nn.initializer.Assign(self.w), ) place = fluid.CPUPlace() exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py index fe012ded39..73ad833a3e 100644 --- a/python/paddle/fluid/tests/unittests/test_run_program_op.py +++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py @@ -403,7 +403,7 @@ class TestRunProgramOpWithFC(RunProgramOpTest): weight_attr = fluid.ParamAttr( name=self.input_names['Params'][0], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][0]] ), trainable=True, @@ -411,7 +411,7 @@ class TestRunProgramOpWithFC(RunProgramOpTest): bias_attr = fluid.ParamAttr( name=self.input_names['Params'][1], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][1]] ), trainable=True, @@ -469,7 +469,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest): param_attr=fluid.ParamAttr( name="emb_weight", learning_rate=10, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][0]] ), ), diff --git a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py index c599f08ae2..3424d39395 100644 --- a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py +++ b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py @@ -26,11 +26,11 @@ class TestAttrSet(unittest.TestCase): ) param_attr = fluid.ParamAttr( name='batch_norm_w', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) bias_attr = fluid.ParamAttr( name='batch_norm_b', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) bn = paddle.static.nn.batch_norm( input=x, param_attr=param_attr, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py index 89515c931c..c63be2c6f2 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py @@ -322,7 +322,7 @@ class TestSGDOpBF16API(unittest.TestCase): print(e) def _set_initializer(self): - self.initializer = fluid.initializer.Constant(value=self.value) + self.initializer = paddle.nn.initializer.Constant(value=self.value) def _data_reader(self): for sample in range(self.sample_count): diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index d043e3785c..0ac2644d90 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -59,26 +59,26 @@ class SimpleLSTMRNN(fluid.Layer): for i in range(self._num_layers): weight_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ), ) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) bias_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) @@ -184,7 +184,7 @@ class PtbModel(fluid.Layer): embedding_dim=hidden_size, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -193,7 +193,7 @@ class PtbModel(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -201,7 +201,7 @@ class PtbModel(fluid.Layer): attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py index 2481a48f01..eaa1397146 100644 --- a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py +++ b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py @@ -151,9 +151,7 @@ class TestTDMChildShape(unittest.TestCase): node_nums=26, child_nums=2, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - tree_info_np - ) + initializer=paddle.nn.initializer.Assign(tree_info_np) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py index 217d84b4b9..c54c6c0c9d 100644 --- a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py +++ b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py @@ -290,12 +290,10 @@ class TestTDMSamplerShape(unittest.TestCase): layer_node_num_list, leaf_node_num, tree_travel_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - travel_array - ) + initializer=paddle.nn.initializer.Assign(travel_array) ), tree_layer_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(layer_array) + initializer=paddle.nn.initializer.Assign(layer_array) ), output_positive=True, output_list=True, diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py index c31d763dbf..407d70b4da 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py @@ -286,7 +286,7 @@ class TestUniformRandomOpApi(unittest.TestCase): y = paddle.static.nn.fc( x, size=16, - weight_attr=fluid.initializer.Uniform( + weight_attr=paddle.nn.initializer.UniformInitializer( low=-0.5, high=0.5, seed=10, diff --git a/python/paddle/fluid/tests/unittests/test_weight_normalization.py b/python/paddle/fluid/tests/unittests/test_weight_normalization.py index 17a05bdb01..f649fe1a28 100644 --- a/python/paddle/fluid/tests/unittests/test_weight_normalization.py +++ b/python/paddle/fluid/tests/unittests/test_weight_normalization.py @@ -20,7 +20,6 @@ import numpy as np import paddle import paddle.fluid as fluid import paddle.fluid.core as core -from paddle.fluid.initializer import ConstantInitializer from paddle.fluid.param_attr import WeightNormParamAttr @@ -44,7 +43,7 @@ class TestWeightNormalization(unittest.TestCase): weight_attr=WeightNormParamAttr( dim=None, name='weight_norm_param', - initializer=ConstantInitializer(1.0), + initializer=paddle.nn.initializer.Constant(1.0), ), bias_attr=False, activation=None, diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py index 42436b6e24..d847ac9ee4 100644 --- a/python/paddle/fluid/tests/unittests/transformer_model.py +++ b/python/paddle/fluid/tests/unittests/transformer_model.py @@ -76,8 +76,8 @@ def multi_head_attention( q = paddle.static.nn.fc( x=queries, size=d_key * n_head, - weight_attr=fluid.initializer.Xavier( - uniform=False, fan_in=d_model * d_key, fan_out=n_head * d_key + weight_attr=paddle.nn.initializer.XavierNormal( + fan_in=d_model * d_key, fan_out=n_head * d_key ), bias_attr=False, num_flatten_dims=2, @@ -85,8 +85,8 @@ def multi_head_attention( k = paddle.static.nn.fc( x=keys, size=d_key * n_head, - weight_attr=fluid.initializer.Xavier( - uniform=False, fan_in=d_model * d_key, fan_out=n_head * d_key + weight_attr=paddle.nn.initializer.XavierNormal( + fan_in=d_model * d_key, fan_out=n_head * d_key ), bias_attr=False, num_flatten_dims=2, @@ -94,8 +94,7 @@ def multi_head_attention( v = paddle.static.nn.fc( x=values, size=d_value * n_head, - weight_attr=fluid.initializer.Xavier( - uniform=False, + weight_attr=paddle.nn.initializer.XavierNormal( fan_in=d_model * d_value, fan_out=n_head * d_value, ), @@ -187,7 +186,7 @@ def multi_head_attention( proj_out = paddle.static.nn.fc( x=out, size=d_model, - weight_attr=fluid.initializer.Xavier(uniform=False), + weight_attr=paddle.nn.initializer.XavierNormal(), bias_attr=False, num_flatten_dims=2, ) @@ -204,7 +203,7 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid): x, size=d_inner_hid, num_flatten_dims=2, - weight_attr=fluid.initializer.Uniform( + weight_attr=paddle.nn.initializer.Uniform( low=-(d_hid**-0.5), high=(d_hid**-0.5) ), activation="relu", @@ -213,7 +212,7 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid): x=hidden, size=d_hid, num_flatten_dims=2, - weight_attr=fluid.initializer.Uniform( + weight_attr=paddle.nn.initializer.Uniform( low=-(d_inner_hid**-0.5), high=(d_inner_hid**-0.5) ), ) @@ -235,8 +234,8 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout=0.0): out = paddle.static.nn.layer_norm( out, begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.initializer.Constant(1.0), - bias_attr=fluid.initializer.Constant(0.0), + param_attr=paddle.nn.initializer.Constant(1.0), + bias_attr=paddle.nn.initializer.Constant(0.0), ) elif cmd == "d": # add dropout if dropout: @@ -269,7 +268,7 @@ def prepare_encoder( src_word, size=[src_vocab_size, src_emb_dim], padding_idx=src_pad_idx, - param_attr=fluid.initializer.Normal(0.0, 1.0), + param_attr=paddle.nn.initializer.Normal(0.0, 1.0), ) src_pos_enc = layers.embedding( src_pos, @@ -587,7 +586,7 @@ def transformer( x=paddle.static.nn.fc( x=dec_output, size=trg_vocab_size, - weight_attr=fluid.initializer.Xavier(uniform=False), + weight_attr=paddle.nn.initializer.XavierNormal(), bias_attr=False, num_flatten_dims=2, ), diff --git a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py index d3909193cd..3ee0469b61 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py @@ -369,7 +369,7 @@ class XPUTestBatchNormOp(XPUOpTestWrapper): net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), use_global_stats=self.use_global_stats, trainable_statistics=self.trainable_statistics, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py index 3518083d75..1764400403 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py @@ -73,34 +73,34 @@ class XPUTestResNetBasicBlockOp(XPUOpTestWrapper): paddle.disable_static() conv1_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv2_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv3_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) bn1_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) bn1_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) bn2_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) bn2_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) bn3_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) bn3_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) self.conv1 = nn.Conv2D( @@ -173,34 +173,34 @@ class XPUTestResNetBasicBlockOp(XPUOpTestWrapper): paddle.disable_static() fused_conv1_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) fused_conv2_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) fused_conv3_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) fused_bn1_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) fused_bn1_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) fused_bn2_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) fused_bn2_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) fused_bn3_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) fused_bn3_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) if self.has_shortcut: diff --git a/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py index 2f699ca3c0..666c29f7fc 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py @@ -163,7 +163,7 @@ def prelu_t(x, mode, param_attr=None, name=None, data_format='NCHW'): shape=alpha_shape, dtype='float32', is_bias=False, - default_initializer=fluid.initializer.ConstantInitializer(0.25), + default_initializer=paddle.nn.initializer.Constant(0.25), ) out = helper.create_variable_for_type_inference(dtype) helper.append_op( diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 29901363db..32486a8dad 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -39,7 +39,7 @@ import logging import numpy as np from .ps_dispatcher import RoundRobin, PSDispatcher -from .. import core, framework, unique_name, initializer +from .. import core, framework, unique_name from ..framework import ( Program, default_main_program, @@ -2856,7 +2856,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler dtype=var.dtype, shape=var.shape, persistable=var.persistable, - initializer=initializer.Constant(1), + initializer=paddle.nn.initializer.Constant(1), ) op_role_attr_name = ( core.op_proto_and_checker_maker.kOpRoleAttrName() diff --git a/python/paddle/incubate/asp/asp.py b/python/paddle/incubate/asp/asp.py index df1c81bffe..7bf04dc151 100644 --- a/python/paddle/incubate/asp/asp.py +++ b/python/paddle/incubate/asp/asp.py @@ -24,7 +24,6 @@ import numpy as np import paddle from paddle.fluid import core, global_scope, program_guard from paddle.fluid.framework import dygraph_only -from paddle.fluid.initializer import ConstantInitializer from paddle.incubate import asp from .supported_layer_list import ( @@ -882,7 +881,9 @@ class ASPHelper: name=ASPHelper._get_mask_name(param.name), shape=param.shape, dtype=param.dtype, - default_initializer=ConstantInitializer(value=1.0), + default_initializer=paddle.nn.initializer.Constant( + value=1.0 + ), ) mask_param.stop_gradient = True mask_param.trainable = False diff --git a/python/paddle/nn/decode.py b/python/paddle/nn/decode.py index 4ad7207701..4ce504d8f8 100644 --- a/python/paddle/nn/decode.py +++ b/python/paddle/nn/decode.py @@ -19,8 +19,8 @@ import warnings import numpy as np import paddle +from paddle.common_ops_import import default_main_program from paddle.framework import _non_static_mode -from paddle.static import default_main_program from ..fluid.data_feeder import convert_dtype from ..fluid.layers.utils import flatten, map_structure diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 57a1e0023d..d8777d2c47 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -16,10 +16,10 @@ import numpy import paddle from paddle import _C_ops, _legacy_C_ops +from paddle.common_ops_import import Variable, default_main_program from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers.tensor import fill_constant from paddle.framework import core, in_dynamic_mode -from paddle.static import Variable, default_main_program from paddle.tensor.creation import full from ...fluid.data_feeder import ( diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 74a97e2593..82d25747ea 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -22,6 +22,7 @@ from paddle.device import ( from paddle.fluid.framework import _global_flags, in_dygraph_mode from paddle.tensor.math import _add_with_axis +from ...common_ops_import import Variable from ...device import get_cudnn_version from ...fluid.data_feeder import check_dtype, check_variable_and_dtype from ...fluid.layer_helper import LayerHelper @@ -32,7 +33,6 @@ from ...fluid.layers.utils import ( convert_to_list, ) from ...framework import no_grad -from ...static import Variable from ...tensor.manipulation import squeeze, unsqueeze __all__ = [] diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py index 3b566b3de3..533bf138a1 100644 --- a/python/paddle/nn/functional/extension.py +++ b/python/paddle/nn/functional/extension.py @@ -18,6 +18,7 @@ import numpy as np from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode +from ...common_ops_import import Variable from ...fluid.data_feeder import ( check_dtype, check_type, @@ -26,7 +27,6 @@ from ...fluid.data_feeder import ( from ...fluid.framework import in_dygraph_mode from ...fluid.layer_helper import LayerHelper from ...framework import convert_np_dtype_to_dtype_, core -from ...static import Variable from ...tensor.creation import assign from ...tensor.layer_function_generator import templatedoc diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index 8964b69df2..eccaffcb72 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -14,10 +14,10 @@ from paddle import _C_ops +from ...common_ops_import import Variable from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.framework import in_dygraph_mode from ...fluid.layer_helper import LayerHelper -from ...static import Variable __all__ = [] diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 90697cb634..001efd74a6 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -20,10 +20,10 @@ from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode from paddle.framework import core from paddle.utils import deprecated +from ...common_ops_import import Variable from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.framework import _current_expected_place, in_dygraph_mode from ...fluid.layer_helper import LayerHelper -from ...static import Variable from ...tensor.manipulation import reshape __all__ = [] diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 4f164e991f..1178928acc 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -15,10 +15,10 @@ from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode from paddle.fluid.framework import in_dygraph_mode +from ...common_ops_import import Variable from ...device import get_cudnn_version, is_compiled_with_rocm from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.layer_helper import LayerHelper -from ...static import Variable __all__ = [] diff --git a/python/paddle/nn/initializer/Bilinear.py b/python/paddle/nn/initializer/Bilinear.py new file mode 100644 index 0000000000..b3a1766d07 --- /dev/null +++ b/python/paddle/nn/initializer/Bilinear.py @@ -0,0 +1,182 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer + +__all__ = [] + + +class Bilinear(Initializer): + """ + This initializer can be used in transposed convolution operator to + act as upsampling. Users can upsample a feature map with shape of + (B, C, H, W) by any integer factor. The usage is: + + Examples: + + .. code-block:: python + + import math + + import paddle + import paddle.nn as nn + from paddle.regularizer import L2Decay + + factor = 2 + C = 2 + B = 8 + H = W = 32 + w_attr = paddle.ParamAttr(learning_rate=0., + regularizer=L2Decay(0.), + initializer=nn.initializer.Bilinear()) + data = paddle.rand([B, 3, H, W], dtype='float32') + conv_up = nn.Conv2DTranspose(3, + out_channels=C, + kernel_size=2 * factor - factor % 2, + padding=int( + math.ceil((factor - 1) / 2.)), + stride=factor, + weight_attr=w_attr, + bias_attr=False) + x = conv_up(data) + + Where, `out_channels=C` and `groups=C` means this is channel-wise transposed + convolution. The filter shape will be (C, 1, K, K) where K is `kernel_size`, + This initializer will set a (K, K) interpolation kernel for every channel + of the filter identically. The resulting shape of the output feature map + will be (B, C, factor * H, factor * W). Note that the learning rate and the + weight decay are set to 0 in order to keep coefficient values of bilinear + interpolation unchanged during training. + + """ + + def __init__(self): + """Constructor for BilinearInitializer.""" + super().__init__() + + def forward(self, var, block=None): + """Initialize the input tensor with Bilinear initialization. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + if not isinstance(var, framework.Variable): + raise ValueError("var must be framework.Variable.") + + if not isinstance(block, framework.Block): + raise ValueError("block must be framework.Block.") + + shape = var.shape + if len(shape) != 4: + raise ValueError("the length of shape must be 4.") + if shape[2] != shape[3]: + raise ValueError("shape[2] must be equal to shape[3].") + + weight = np.zeros(np.prod(var.shape), dtype='float32') + size = shape[3] + # factor + f = np.ceil(size / 2.0) + # center + c = (2 * f - 1 - f % 2) / (2.0 * f) + for i in range(np.prod(shape)): + x = i % size + y = (i / size) % size + weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) + weight = np.reshape(weight, shape) + + # to be compatible of fp16 initalizers + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + core.VarDesc.VarType.FP64, + ]: + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['bilinear_init', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if out_dtype == core.VarDesc.VarType.FP32: + value_name = "fp32_values" + values = [float(v) for v in weight.flat] + else: + raise TypeError("Unsupported dtype %s", var.dtype) + + if np.prod(shape) > 1024 * 1024: + raise ValueError("The size of input is too big. ") + + if in_dygraph_mode(): + _C_ops.assign_value_( + out_var, + list(shape), + out_dtype, + values, + _current_expected_place(), + ) + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + core.VarDesc.VarType.FP64, + ]: + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + op = block.append_op( + type='assign_value', + outputs={'Out': [out_var]}, + attrs={ + 'dtype': out_dtype, + 'shape': list(shape), + value_name: values, + }, + ) + + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + core.VarDesc.VarType.FP64, + ]: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op diff --git a/python/paddle/nn/initializer/__init__.py b/python/paddle/nn/initializer/__init__.py index e078e19ed2..6ef516c8b6 100644 --- a/python/paddle/nn/initializer/__init__.py +++ b/python/paddle/nn/initializer/__init__.py @@ -13,9 +13,9 @@ # limitations under the License. # TODO: define the initializers to create a Parameter in neural network -from ...fluid.initializer import Bilinear # noqa: F401 from ...fluid.initializer import set_global_initializer # noqa: F401 -from ...fluid.initializer import calculate_gain # noqa: F401 + +from .Bilinear import Bilinear # noqa: F401 from .constant import Constant # noqa: F401 @@ -36,6 +36,15 @@ from .orthogonal import Orthogonal # noqa: F401 from .dirac import Dirac # noqa: F401 +from .initializer import Initializer, calculate_gain # noqa: F401 +from .uniform import UniformInitializer # noqa: F401 +from .constant import ConstantInitializer # noqa: F401 +from .normal import NormalInitializer # noqa: F401 +from .normal import TruncatedNormalInitializer # noqa: F401 +from .xavier import XavierInitializer # noqa: F401 +from .kaiming import MSRAInitializer # noqa: F401 +from .assign import NumpyArrayInitializer # noqa: F401 + __all__ = [ # noqa 'Bilinear', 'Constant', diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py index 052da37af2..3ab5a896e4 100644 --- a/python/paddle/nn/initializer/assign.py +++ b/python/paddle/nn/initializer/assign.py @@ -12,20 +12,134 @@ # See the License for the specific language governing permissions and # limitations under the License. import paddle +from paddle import _C_ops +from ...fluid import core, framework, unique_name from ...fluid.data_feeder import check_type -from ...fluid.initializer import NumpyArrayInitializer +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer __all__ = [] +class NumpyArrayInitializer(Initializer): + """Init an parameter with an numpy array + This api initialize the tensor by numpy array. + + Args: + value (numpy): numpy array to initialize the tensor + + Returns: + A Tensor initialized by numpy. + + """ + + def __init__(self, value): + import numpy + + assert isinstance(value, numpy.ndarray) + super().__init__() + self._value = value + + def forward(self, var, block=None): + """Initialize the input tensor with Numpy array. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + + # to be compatible of fp16 initalizers + if var.dtype in [core.VarDesc.VarType.FP16, core.VarDesc.VarType.BF16]: + out_dtype = core.VarDesc.VarType.FP32 + np_value = self._value.astype("float32") + out_var = block.create_var( + name=unique_name.generate( + ".".join(['numpy_array_init', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_var = var + out_dtype = var.dtype + np_value = self._value + + if out_dtype == core.VarDesc.VarType.FP32: + value_name = "fp32_values" + values = [float(v) for v in np_value.flat] + elif out_dtype == core.VarDesc.VarType.INT32: + value_name = "int32_values" + values = [int(v) for v in np_value.flat] + else: + raise ValueError("Unsupported dtype %s", self._value.dtype) + if self._value.size > 1024 * 1024 * 1024: + raise ValueError( + "The size of input is too big. Please consider " + "saving it to file and 'load_op' to load it" + ) + + if in_dygraph_mode(): + _C_ops.assign_value_( + out_var, + list(self._value.shape), + out_dtype, + values, + _current_expected_place(), + ) + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + ]: + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + op = block.append_op( + type='assign_value', + outputs={'Out': out_var}, + attrs={ + 'dtype': out_dtype, + 'shape': list(self._value.shape), + value_name: values, + }, + stop_gradient=True, + ) + + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + ]: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op + + class Assign(NumpyArrayInitializer): """Init an parameter with a numpy array, list, or tensor. Args: value (Tensor|numpy.ndarray|list|tuple): numpy array, list, tuple, or tensor to initialize the parameter. - name(str, optional): The default value is None. Normally there is no need for user to set this - property. For more information, please refer to :ref:`api_guide_Name`. + name(str, optional): Normally there is no need for user to set this + property. For more information, please refer to :ref:`api_guide_Name`. Default is None. Returns: A parameter initialized by the input numpy array, list, or tensor. diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py index 637ae62990..0016467f11 100644 --- a/python/paddle/nn/initializer/constant.py +++ b/python/paddle/nn/initializer/constant.py @@ -12,12 +12,75 @@ # See the License for the specific language governing permissions and # limitations under the License. +from paddle import _C_ops + +from ...fluid import core, framework +from ...fluid.framework import _current_expected_place, in_dygraph_mode + # TODO: define the initializers of Constant in neural network -from ...fluid.initializer import ConstantInitializer +from .initializer import Initializer __all__ = [] +class ConstantInitializer(Initializer): + """Implements the constant initializer + + Args: + value (float32, optional): constant value to initialize the variable. Default: 0.0. + + """ + + def __init__(self, value=0.0, force_cpu=False): + assert value is not None + super().__init__() + self._value = value + self._force_cpu = force_cpu + + def forward(self, var, block=None): + """Initialize the input tensor with constant. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(var, framework.Variable) or isinstance( + var, framework.EagerParamBase + ) + assert isinstance(block, framework.Block) + + if in_dygraph_mode(): + place = _current_expected_place() + if self._force_cpu: + place = core.CPUPlace() + _C_ops.full_( + var, var.shape, str(float(self._value)), var.dtype, place + ) + return None + else: + op = block.append_op( + type="fill_constant", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "dtype": int(var.dtype), + "value": float(self._value), + 'str_value': str(float(self._value)), + 'force_cpu': self._force_cpu, + }, + stop_gradient=True, + ) + + var.op = op + return op + + class Constant(ConstantInitializer): """Implement the constant initializer. diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py index 0917859415..3abcc300bc 100644 --- a/python/paddle/nn/initializer/dirac.py +++ b/python/paddle/nn/initializer/dirac.py @@ -20,7 +20,7 @@ from ...fluid import framework from ...fluid.core import VarDesc from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.framework import _current_expected_place -from ...fluid.initializer import Initializer +from .initializer import Initializer __all__ = [] diff --git a/python/paddle/nn/initializer/initializer.py b/python/paddle/nn/initializer/initializer.py new file mode 100644 index 0000000000..c320fa68cd --- /dev/null +++ b/python/paddle/nn/initializer/initializer.py @@ -0,0 +1,159 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import math + +import numpy as np + +from ...fluid.framework import default_main_program, in_dygraph_mode +from ...fluid.lazy_init import lazy_init_helper + +__all__ = [] + + +class Initializer: + """Base class for parameter initializers + + Defines the common interface of parameter initializers. + They add operations to the init program that are used + to initialize parameter. Users should not use this class + directly, but need to use one of its implementations. + """ + + def __init__(self): + pass + + def __call__(self, param, block=None): + if not lazy_init_helper().state: + return self.forward(param, block) + + return self._lazy_init(param, block) + + def forward(self, param, block=None): + """Add corresponding initialization operations to the network""" + raise NotImplementedError() + + def _lazy_init(self, param, block=None): + """ + Apply lazy initialization + """ + assert in_dygraph_mode() + + def init_op_creator(forward, param, block): + new_var = param._to_static_var(True, block=block) + # Record initializer operator + with lazy_init_helper(): + forward(new_var, block) + + # Add hook function for initializing param in dygraph mode + param.set_init_func(functools.partial(self.forward, param, block)) + param._init_op_creator = functools.partial( + init_op_creator, self.forward, param + ) + + return param + + def _check_block(self, block): + if block is None: + block = default_main_program().global_block() + + return block + + def _compute_fans(self, var): + """Compute the fan_in and the fan_out for layers + + This method computes the fan_in and the fan_out + for neural network layers, if not specified. It is + not possible to perfectly estimate fan_in and fan_out. + This method will estimate it correctly for matrix multiply and + convolutions. + + Args: + var: variable for which fan_in and fan_out have to be computed + + Returns: + tuple of two integers (fan_in, fan_out) + """ + shape = var.shape + if not shape or len(shape) == 0: + fan_in = fan_out = 1 + elif len(shape) == 1: + fan_in = fan_out = shape[0] + elif len(shape) == 2: + # This is the case for simple matrix multiply + fan_in = shape[0] + fan_out = shape[1] + else: + # Assume this to be a convolutional kernel + # In PaddlePaddle, the shape of the kernel is like: + # [num_filters, num_filter_channels, ...] where the remaining + # dimensions are the filter_size + receptive_field_size = np.prod(shape[2:]) + fan_in = shape[1] * receptive_field_size + fan_out = shape[0] * receptive_field_size + + return (fan_in, fan_out) + + +def calculate_gain(nonlinearity, param=None): + """ + Get the recommended ``gain`` value of some nonlinearity function. ``gain`` value can be used in some + ``paddle.nn.initializer`` api to adjust the initialization value. + + Args: + nonlinearity(str): name of nonlinearity activation function. If it is a linear function, such as: + `linear/conv1d/conv2d/conv3d/conv1d_transpose/conv2d_transpose/conv3d_transpose` , 1.0 will be returned. + param(bool|int|float, optional): optional parameter for somme nonlinearity function. Now, it only applies to + 'leaky_relu'. Default: None, it will be calculated as 0.01 in the formula. + + Returns: + A float value, which is the recommended gain for this nonlinearity function. + + Examples: + .. code-block:: python + + import paddle + gain = paddle.nn.initializer.calculate_gain('tanh') # 5.0 / 3 + gain = paddle.nn.initializer.calculate_gain('leaky_relu', param=1.0) # 1.0 = math.sqrt(2.0 / (1+param^2)) + initializer = paddle.nn.initializer.Orthogonal(gain) + + """ + if param is None: + param = 0.01 + else: + assert isinstance(param, (bool, int, float)) + param = float(param) + recommended_gain = { + 'sigmoid': 1, + 'linear': 1, + 'conv1d': 1, + 'conv2d': 1, + 'conv3d': 1, + 'conv1d_transpose': 1, + 'conv2d_transpose': 1, + 'conv3d_transpose': 1, + 'tanh': 5.0 / 3, + 'relu': math.sqrt(2.0), + 'leaky_relu': math.sqrt(2.0 / (1 + param**2)), + 'selu': 3.0 / 4, + } + if nonlinearity in recommended_gain.keys(): + return recommended_gain[nonlinearity] + else: + raise ValueError( + "nonlinearity function {} is not suppported now.".format( + nonlinearity + ) + ) diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index f214e46fa4..c3a8732315 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -13,11 +13,185 @@ # limitations under the License. # TODO: define the initializers of Kaiming functions in neural network -from ...fluid.initializer import MSRAInitializer +import math + +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer, calculate_gain __all__ = [] +class MSRAInitializer(Initializer): + r"""Implements the MSRA initializer a.k.a. Kaiming Initializer + + This class implements the weight initialization from the paper + `Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification `_ + by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a + robust initialization method that particularly considers the rectifier + nonlinearities. In case of Uniform distribution, the range is [-x, x], where + + .. math:: + + x = gain \times \sqrt{\frac{3}{fan\_in}} + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \frac{gain}{\sqrt{{fan\_in}}} + + Args: + uniform (bool, optional): whether to use uniform or normal distribution. Default is True. + fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None. + seed (int32, optional): random seed. Default is 0. + negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0. + nonlinearity(str, optional): the non-linear function. Default is relu. + + Note: + It is recommended to set fan_in to None for most cases. + + """ + + def __init__( + self, + uniform=True, + fan_in=None, + seed=0, + negative_slope=0, + nonlinearity='relu', + ): + """Constructor for MSRAInitializer""" + assert uniform is not None + assert seed is not None + super().__init__() + self._uniform = uniform + self._fan_in = fan_in + self._seed = seed + self._negative_slope = negative_slope + self._nonlinearity = nonlinearity + + def forward(self, var, block=None): + """Initialize the input tensor with MSRA initialization. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + f_in, f_out = self._compute_fans(var) + + # If fan_in is passed, use it + fan_in = f_in if self._fan_in is None else self._fan_in + + if self._seed == 0: + self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['masra_init', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if in_dygraph_mode(): + if self._uniform: + gain = calculate_gain(self._nonlinearity, self._negative_slope) + limit = gain * math.sqrt(3.0 / float(fan_in)) + out_var = _C_ops.uniform( + var.shape, + out_dtype, + -limit, + limit, + self._seed, + _current_expected_place(), + ) + else: + gain = calculate_gain(self._nonlinearity, self._negative_slope) + std = gain / math.sqrt(float(fan_in)) + place = _current_expected_place() + out_var = _C_ops.gaussian( + out_var.shape, 0.0, std, self._seed, out_dtype, place + ) + + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + if self._uniform: + gain = calculate_gain(self._nonlinearity, self._negative_slope) + limit = gain * math.sqrt(3.0 / float(fan_in)) + op = block.append_op( + type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": int(out_dtype), + "min": -limit, + "max": limit, + "seed": self._seed, + }, + stop_gradient=True, + ) + + else: + gain = calculate_gain(self._nonlinearity, self._negative_slope) + std = gain / math.sqrt(float(fan_in)) + op = block.append_op( + type="gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": int(out_dtype), + "mean": 0.0, + "std": std, + "seed": self._seed, + }, + stop_gradient=True, + ) + + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op + + class KaimingNormal(MSRAInitializer): r"""Implements the Kaiming Normal initializer @@ -36,9 +210,9 @@ class KaimingNormal(MSRAInitializer): \frac{gain}{\sqrt{{fan\_in}}} Args: - fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. default is None. - negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. - nonlinearity(str, optional): the non-linear function. default is relu. + fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None. + negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0. + nonlinearity(str, optional): the non-linear function. Default is relu. Note: It is recommended to set fan_in to None for most cases. @@ -84,9 +258,9 @@ class KaimingUniform(MSRAInitializer): x = gain \times \sqrt{\frac{3}{fan\_in}} Args: - fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. default is None. - negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. - nonlinearity(str, optional): the non-linear function. default is relu. + fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None. + negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0. + nonlinearity(str, optional): the non-linear function. Default is relu. Note: It is recommended to set fan_in to None for most cases. diff --git a/python/paddle/nn/initializer/normal.py b/python/paddle/nn/initializer/normal.py index 5ead30f4f1..030ec95940 100644 --- a/python/paddle/nn/initializer/normal.py +++ b/python/paddle/nn/initializer/normal.py @@ -12,19 +12,99 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.initializer import NormalInitializer, TruncatedNormalInitializer +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer __all__ = [] +class NormalInitializer(Initializer): + """Implements the Random Normal(Gaussian) distribution initializer + + Args: + loc (float, optional): mean of the normal distribution. Default is 0.0. + scale (float, optional): standard deviation of the normal distribution. Default is 1.0. + seed (int, optional): random seed. Default is 0. + + """ + + def __init__(self, loc=0.0, scale=1.0, seed=0): + assert loc is not None + assert scale is not None + assert seed is not None + super().__init__() + self._mean = loc + self._std_dev = scale + self._seed = seed + + def forward(self, var, block=None): + """Initialize the input tensor with Normal distribution. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(block, framework.Block) + + check_variable_and_dtype( + var, + "Out", + ["uint16", "float16", "float32", "float64"], + "guassian_random", + ) + + if self._seed == 0: + self._seed = block.program.random_seed + + if in_dygraph_mode(): + place = _current_expected_place() + out_var = _C_ops.gaussian( + var.shape, + self._mean, + self._std_dev, + self._seed, + var.dtype, + place, + ) + out_var._share_underline_tensor_to(var) + return None + + else: + op = block.append_op( + type="gaussian_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "dtype": var.dtype, + "mean": self._mean, + "std": self._std_dev, + "seed": self._seed, + "use_mkldnn": False, + }, + stop_gradient=True, + ) + var.op = op + return op + + class Normal(NormalInitializer): """The Random Normal (Gaussian) distribution initializer. Args: - mean (float, optional): mean of the normal distribution. The default value is 0.0. - std (float, optional): standard deviation of the normal distribution. The default value is 1.0. + mean (float, optional): mean of the normal distribution. Default is 0.0. + std (float, optional): standard deviation of the normal distribution. Default is 1.0. name(str, optional): The default value is None. Normally there is no need for user to set this - property. For more information, please refer to :ref:`api_guide_Name`. + property. For more information, please refer to :ref:`api_guide_Name`. Default: None. Returns: A parameter initialized by Random Normal (Gaussian) distribution. @@ -58,12 +138,113 @@ class Normal(NormalInitializer): super().__init__(loc=mean, scale=std, seed=0) +class TruncatedNormalInitializer(Initializer): + """Implements the Random TruncatedNormal(Gaussian) distribution initializer + + Args: + loc (float, optional): Mean of the normal distribution. Default is :math:`0.0`. + scale (float, optional): Standard deviation of the normal distribution. Default is :math:`1.0`. + seed (int, optional): random seed. Default is 0. + + """ + + def __init__(self, loc=0.0, scale=1.0, seed=0): + assert loc is not None + assert scale is not None + assert seed is not None + super().__init__() + self._mean = loc + self._std_dev = scale + self._seed = seed + + def forward(self, var, block=None): + """Initialize the input tensor with TruncatedNormal distribution. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + + if self._seed == 0: + self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype in [core.VarDesc.VarType.FP16, core.VarDesc.VarType.BF16]: + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['truncated_gaussian_random', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if in_dygraph_mode(): + out_var = _C_ops.truncated_gaussian_random( + var.shape, + self._mean, + self._std_dev, + self._seed, + out_dtype, + _current_expected_place(), + ) + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + ]: + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + + else: + op = block.append_op( + type="truncated_gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": var.shape, + "dtype": out_dtype, + "mean": self._mean, + "std": self._std_dev, + "seed": self._seed, + }, + stop_gradient=True, + ) + + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + ]: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + var.op = op + return op + + class TruncatedNormal(TruncatedNormalInitializer): """The truncated normal distribution (Gaussian distribution) initializer. Args: - mean (float, optional): Mean of the normal distribution. The default value is :math:`0.0`. - std (float, optional): Standard deviation of the normal distribution. The default value is :math:`1.0`. + mean (float, optional): Mean of the normal distribution. Default is :math:`0.0`. + std (float, optional): Standard deviation of the normal distribution. Default is :math:`1.0`. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: diff --git a/python/paddle/nn/initializer/orthogonal.py b/python/paddle/nn/initializer/orthogonal.py index 0bbfd9eaaa..65a496f2b1 100644 --- a/python/paddle/nn/initializer/orthogonal.py +++ b/python/paddle/nn/initializer/orthogonal.py @@ -18,7 +18,7 @@ from paddle.utils import unique_name from ...fluid import framework from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.dygraph import no_grad -from ...fluid.initializer import Initializer +from .initializer import Initializer __all__ = [] diff --git a/python/paddle/nn/initializer/uniform.py b/python/paddle/nn/initializer/uniform.py index 011cb6eff6..cd64a15b75 100644 --- a/python/paddle/nn/initializer/uniform.py +++ b/python/paddle/nn/initializer/uniform.py @@ -12,17 +12,144 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.initializer import UniformInitializer +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer __all__ = [] +class UniformInitializer(Initializer): + """Implements the random uniform distribution initializer + + Args: + low (float, optional): Lower boundary of the uniform distribution. Default is :math:`-1.0`. + high (float, optional): Upper boundary of the uniform distribution. Default is :math:`1.0`. + seed (int, optional): Random seed. Default is 0. + diag_num (int, optional): the number of diagonal elements to initialize. + If set to 0, diagonal initialization will be not performed. Default is 0. + diag_step (int, optional): Step size between two diagonal elements, + which is generally the width of the square matrix. Default is 0. + diag_val (float, optional): the value of the diagonal element to be initialized, + default 1.0. It takes effect only if the diag_num is greater than 0. Default is :math:`1.0`. + + """ + + def __init__( + self, low=-1.0, high=1.0, seed=0, diag_num=0, diag_step=0, diag_val=1.0 + ): + assert low is not None + assert high is not None + assert high >= low + assert seed is not None + assert diag_num is not None + assert diag_step is not None + assert diag_val is not None + if diag_num > 0 or diag_step > 0: + assert diag_num > 0 and diag_step > 0 + super().__init__() + self._low = low + self._high = high + self._seed = seed + self._diag_num = diag_num + self._diag_step = diag_step + self._diag_val = diag_val + + def forward(self, var, block=None): + """Initialize the input tensor with Uniform distribution. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(block, framework.Block) + if not in_dygraph_mode(): + check_variable_and_dtype( + var, + "Out", + ["uint16", "float16", "float32", "float64"], + "uniform_random", + ) + + if self._seed == 0: + self._seed = block.program.random_seed + + # to be compatible of fp16 initializers + if var.dtype == core.VarDesc.VarType.FP16: + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['uniform_random', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if in_dygraph_mode(): + out_var = _C_ops.uniform( + var.shape, + out_dtype, + self._low, + self._high, + self._seed, + _current_expected_place(), + ) + if var.dtype == core.VarDesc.VarType.FP16: + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + op = block.append_op( + type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": var.shape, + "dtype": out_dtype, + "min": self._low, + "max": self._high, + "seed": self._seed, + "diag_num": self._diag_num, + "diag_step": self._diag_step, + "diag_val": self._diag_val, + }, + stop_gradient=True, + ) + + if var.dtype == core.VarDesc.VarType.FP16: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op + + class Uniform(UniformInitializer): """The uniform distribution initializer. Args: - low (float, optional): Lower boundary of the uniform distribution. The default value is :math:`-1.0`. - high (float, optional): Upper boundary of the uniform distribution. The default value is :math:`1.0`. + low (float, optional): Lower boundary of the uniform distribution. Default is :math:`-1.0`. + high (float, optional): Upper boundary of the uniform distribution. Default is :math:`1.0`. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py index 35e104edba..6d17c029f5 100644 --- a/python/paddle/nn/initializer/xavier.py +++ b/python/paddle/nn/initializer/xavier.py @@ -12,11 +12,183 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.initializer import XavierInitializer +import math + +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer __all__ = [] +class XavierInitializer(Initializer): + r""" + This class implements the Xavier weight initializer from the paper + `Understanding the difficulty of training deep feedforward neural + networks `_ + by Xavier Glorot and Yoshua Bengio. + + This initializer is designed to keep the scale of the gradients + approximately same in all the layers. In case of Uniform distribution, + the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \sqrt{\\frac{2.0}{fan\_in + fan\_out}} + + + Args: + uniform (bool, optional): whether to use uniform ,if False use normal distribution. Default is True. + fan_in (float, optional): fan_in for Xavier initialization. If None, it is + inferred from the variable. Default is None. + fan_out (float, optional): fan_out for Xavier initialization. If None, it is + inferred from the variable. Default is None. + seed (int, optional): Random seed. Default is 0. + + Note: + It is recommended to set fan_in and fan_out to None for most cases. + + """ + + def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): + assert uniform is not None + assert seed is not None + super().__init__() + self._uniform = uniform + self._fan_in = fan_in + self._fan_out = fan_out + self._seed = seed + + def forward(self, var, block=None): + """Initialize the input tensor with Xavier initialization. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(block, framework.Block) + check_variable_and_dtype( + var, + "Out", + ["uint16", "float16", "float32", "float64"], + "xavier_init", + ) + + f_in, f_out = self._compute_fans(var) + + # If fan_in and fan_out are passed, use them + fan_in = f_in if self._fan_in is None else self._fan_in + fan_out = f_out if self._fan_out is None else self._fan_out + + if self._seed == 0: + self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['xavier_init', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if in_dygraph_mode(): + if self._uniform: + limit = math.sqrt(6.0 / float(fan_in + fan_out)) + out_var = _C_ops.uniform( + out_var.shape, + out_dtype, + -limit, + limit, + self._seed, + _current_expected_place(), + ) + else: + std = math.sqrt(2.0 / float(fan_in + fan_out)) + + place = _current_expected_place() + out_var = _C_ops.gaussian( + out_var.shape, 0.0, std, self._seed, out_dtype, place + ) + + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + if self._uniform: + limit = math.sqrt(6.0 / float(fan_in + fan_out)) + op = block.append_op( + type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": out_dtype, + "min": -limit, + "max": limit, + "seed": self._seed, + }, + stop_gradient=True, + ) + else: + std = math.sqrt(2.0 / float(fan_in + fan_out)) + op = block.append_op( + type="gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": out_var.dtype, + "mean": 0.0, + "std": std, + "seed": self._seed, + }, + stop_gradient=True, + ) + + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op + + class XavierNormal(XavierInitializer): r""" This class implements the Xavier weight initializer from the paper @@ -31,9 +203,9 @@ class XavierNormal(XavierInitializer): Args: fan_in (float, optional): fan_in for Xavier initialization, which is - inferred from the Tensor. The default value is None. + inferred from the Tensor. Default is None. fan_out (float, optional): fan_out for Xavier initialization, which is - inferred from the Tensor. The default value is None. + inferred from the Tensor. Default is None. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: @@ -83,9 +255,9 @@ class XavierUniform(XavierInitializer): Args: fan_in (float, optional): fan_in for Xavier initialization, which is - inferred from the Tensor. The default value is None. + inferred from the Tensor. Default is None. fan_out (float, optional): fan_out for Xavier initialization, which is - inferred from the Tensor. The default value is None. + inferred from the Tensor. Default is None. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 2617c76ae6..4bf31ca30e 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -20,15 +20,20 @@ import numpy as np import paddle from paddle import _C_ops, _legacy_C_ops, framework, in_dynamic_mode +from paddle.common_ops_import import Variable from paddle.fluid.data_feeder import check_type, check_variable_and_dtype -from paddle.fluid.framework import _non_static_mode, in_dygraph_mode +from paddle.fluid.framework import ( + _non_static_mode, + default_startup_program, + in_dygraph_mode, + program_guard, +) from paddle.fluid.layers import control_flow, sequence_lod, utils from paddle.fluid.layers.utils import flatten, map_structure from paddle.framework import core from paddle.nn import Layer from paddle.nn import functional as F from paddle.nn import initializer as I -from paddle.static import Variable, default_startup_program, program_guard from paddle.tensor.manipulation import tensor_array_to_tensor from .container import LayerList diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index cad226952b..d9e1cd4560 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -34,7 +34,6 @@ from paddle.fluid.framework import ( from ..fluid import framework, unique_name from ..fluid.backward import _get_no_grad_set_name, append_backward from ..fluid.framework import Parameter, program_guard -from ..fluid.initializer import Constant from ..fluid.layer_helper import LayerHelper from .lr import LRScheduler @@ -453,7 +452,8 @@ class Optimizer: lr_value = float(self._learning_rate()) self.helper.set_variable_initializer( - lr_var, initializer=Constant(value=lr_value) + lr_var, + initializer=paddle.nn.initializer.Constant(value=lr_value), ) elif isinstance(self._learning_rate, float): # only create global lr_var once @@ -726,7 +726,10 @@ class Optimizer: else: with device_guard(device): self.helper.set_variable_initializer( - var, initializer=Constant(value=float(fill_value)) + var, + initializer=paddle.nn.initializer.Constant( + value=float(fill_value) + ), ) if framework._non_static_mode(): diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index 1581f29921..ef49b5642a 100644 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -28,9 +28,9 @@ from paddle.common_ops_import import ( from paddle.fluid import core from paddle.fluid.data_feeder import check_dtype from paddle.fluid.framework import Variable, _non_static_mode, static_only -from paddle.fluid.initializer import Constant, Normal from paddle.fluid.layers.layer_function_generator import templatedoc from paddle.fluid.param_attr import ParamAttr +from paddle.nn.initializer import Constant, Normal __all__ = [] @@ -1012,7 +1012,7 @@ def conv2d( "filter size.".format(filter_elem_num) ) std = (2.0 / filter_elem_num) ** 0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) filter_param = helper.create_parameter( attr=helper.param_attr, @@ -1315,7 +1315,7 @@ def conv3d( ) std = (2.0 / filter_elem_num) ** 0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) filter_param = helper.create_parameter( attr=helper.param_attr, @@ -2286,7 +2286,7 @@ def deformable_conv( "filter size.".format(filter_elem_num) ) std = (2.0 / filter_elem_num) ** 0.5 - return paddle.nn.initializer.normal.NormalInitializer(0.0, std, 0) + return paddle.nn.initializer.normal.Normal(0.0, std) filter_param = helper.create_parameter( attr=helper.param_attr, @@ -2757,7 +2757,7 @@ def batch_norm( attr=helper.param_attr, shape=param_shape, dtype=dtype, - default_initializer=paddle.fluid.initializer.Constant(1.0), + default_initializer=paddle.nn.initializer.Constant(1.0), ) bias = helper.create_parameter( attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True @@ -2766,7 +2766,7 @@ def batch_norm( mean = helper.create_parameter( attr=paddle.ParamAttr( name=moving_mean_name, - initializer=paddle.fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), trainable=False, do_model_average=do_model_average_for_mean_and_var, ), @@ -2778,7 +2778,7 @@ def batch_norm( variance = helper.create_parameter( attr=paddle.ParamAttr( name=moving_variance_name, - initializer=paddle.fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=False, do_model_average=do_model_average_for_mean_and_var, ), diff --git a/python/paddle/static/nn/loss.py b/python/paddle/static/nn/loss.py index 20c7641e2d..3f464928c2 100644 --- a/python/paddle/static/nn/loss.py +++ b/python/paddle/static/nn/loss.py @@ -16,12 +16,12 @@ import numpy as np from paddle.fluid.framework import static_only -from paddle.fluid.initializer import NumpyArrayInitializer # TODO: define loss functions of neural network from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers.layer_function_generator import templatedoc from paddle.fluid.param_attr import ParamAttr +from paddle.nn.initializer import Assign from ...fluid.data_feeder import check_variable_and_dtype @@ -209,7 +209,7 @@ def nce( attr=ParamAttr(), shape=numpy_array.shape, dtype=numpy_array.dtype, - default_initializer=NumpyArrayInitializer(numpy_array), + default_initializer=Assign(numpy_array), ) ret.stop_gradient = True return ret diff --git a/python/paddle/static/nn/metric.py b/python/paddle/static/nn/metric.py index 7406525b9d..bcb3cfc130 100644 --- a/python/paddle/static/nn/metric.py +++ b/python/paddle/static/nn/metric.py @@ -18,9 +18,9 @@ All layers just related to metric. from paddle import _legacy_C_ops from paddle.fluid.data_feeder import check_variable_and_dtype from paddle.fluid.framework import Variable, _non_static_mode, _varbase_creator -from paddle.fluid.initializer import Constant from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers import tensor +from paddle.nn.initializer import ConstantInitializer __all__ = [] @@ -266,7 +266,8 @@ def auc( for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]: helper.set_variable_initializer( - var, Constant(value=0.0, force_cpu=False) + var, + ConstantInitializer(value=0.0, force_cpu=False), ) # "InsTagWeight": [ins_tag_weight] diff --git a/python/paddle/tensor/array.py b/python/paddle/tensor/array.py index 70b606c3c6..84fc94b5ee 100644 --- a/python/paddle/tensor/array.py +++ b/python/paddle/tensor/array.py @@ -14,9 +14,9 @@ # Define functions about array. +from ..common_ops_import import Variable from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..framework import LayerHelper, core, in_dygraph_mode -from ..static import Variable __all__ = [] diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py index 37a1aaf3c8..c79c9553c2 100644 --- a/python/paddle/tensor/attribute.py +++ b/python/paddle/tensor/attribute.py @@ -19,10 +19,10 @@ import numpy as np import paddle from paddle import _C_ops +from ..common_ops_import import Variable from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..fluid.framework import in_dygraph_mode from ..framework import LayerHelper, core -from ..static import Variable from .creation import _complex_to_real_dtype, assign __all__ = [] diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 7523845c2b..808e4d86d6 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -35,7 +35,6 @@ from ..fluid.framework import ( _in_eager_without_dygraph_check, device_guard, ) -from ..fluid.initializer import Constant, Initializer from ..fluid.layers import utils from ..fluid.param_attr import ParamAttr from ..framework import ( @@ -140,7 +139,10 @@ def create_global_var( stop_gradient=True, ) helper.set_variable_initializer( - var, initializer=Constant(value=float(value), force_cpu=force_cpu) + var, + initializer=paddle.nn.initializer.ConstantInitializer( + value=float(value), force_cpu=force_cpu + ), ) return var @@ -214,7 +216,7 @@ def create_parameter( check_type( default_initializer, 'default_initializer', - (type(None), Initializer), + (type(None), paddle.nn.initializer.Initializer), 'create_parameter', ) diff --git a/python/paddle/tensor/layer_function_generator.py b/python/paddle/tensor/layer_function_generator.py index 299e41d2ae..6d9c5fe288 100644 --- a/python/paddle/tensor/layer_function_generator.py +++ b/python/paddle/tensor/layer_function_generator.py @@ -19,6 +19,7 @@ from io import StringIO from paddle import _C_ops, _legacy_C_ops +from ..common_ops_import import Variable from ..fluid.data_feeder import check_variable_and_dtype from ..fluid.proto import framework_pb2 from ..framework import ( @@ -28,7 +29,6 @@ from ..framework import ( core, in_dygraph_mode, ) -from ..static import Variable __all__ = [] diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 10c8c24a78..c59202977f 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -18,13 +18,13 @@ import paddle from paddle import _C_ops from paddle.common_ops_import import VarDesc +from ..common_ops_import import Variable from ..fluid.data_feeder import ( check_dtype, check_type, check_variable_and_dtype, ) from ..framework import LayerHelper, in_dygraph_mode -from ..static import Variable from .creation import full from .logic import logical_not from .manipulation import cast diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py old mode 100755 new mode 100644 index 375f3614e5..ad6c30e319 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -16,9 +16,9 @@ import paddle +from ..common_ops_import import Variable from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..fluid.framework import global_var -from ..static import Variable from .layer_function_generator import templatedoc if global_var._in_eager_mode_: diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index b5308e6cee..b9feee2fe1 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -20,7 +20,7 @@ import paddle from paddle import _C_ops from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only -from ..common_ops_import import fill_constant +from ..common_ops_import import Variable, fill_constant from ..fluid.data_feeder import ( check_dtype, check_type, @@ -35,7 +35,6 @@ from ..framework import ( dygraph_only, in_dygraph_mode, ) -from ..static import Variable from .creation import _complex_to_real_dtype, _real_to_complex_dtype, zeros __all__ = [] diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 81b092f4c3..6f797b82e1 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -25,6 +25,7 @@ from paddle.common_ops_import import VarDesc, dygraph_only, dygraph_utils # TODO: define math functions from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only +from ..common_ops_import import Variable from ..fluid.data_feeder import ( check_dtype, check_type, @@ -38,7 +39,6 @@ from ..framework import ( core, in_dygraph_mode, ) -from ..static import Variable from .creation import _complex_to_real_dtype from .layer_function_generator import generate_layer_fn, templatedoc from .manipulation import cast diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 59958df236..ff48780423 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -16,8 +16,8 @@ import paddle from paddle import _C_ops, _legacy_C_ops +from paddle.common_ops_import import Variable from paddle.fluid.framework import _current_expected_place, in_dygraph_mode -from paddle.static import Variable from ..fluid.data_feeder import ( check_dtype, diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index cc94aee415..f978447839 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -18,9 +18,9 @@ import paddle from paddle import _C_ops, _legacy_C_ops from paddle.fluid.framework import in_dygraph_mode +from ..common_ops_import import Variable from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..framework import LayerHelper, core -from ..static import Variable from .math import _get_reduce_axis_with_tensor from .search import where diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 0d43bd0fc5..2cd582884a 100755 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -19,11 +19,11 @@ from paddle.tensor.math import _add_with_axis from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..fluid.framework import Variable, in_dygraph_mode -from ..fluid.initializer import Normal from ..fluid.layer_helper import LayerHelper from ..fluid.layers import utils from ..framework import _current_expected_place from ..nn import BatchNorm2D, Conv2D, Layer, ReLU, Sequential +from ..nn.initializer import Normal __all__ = [ # noqa 'yolo_loss', @@ -1120,7 +1120,7 @@ class DeformConv2D(Layer): def _get_default_param_initializer(): filter_elem_num = np.prod(self._kernel_size) * self._in_channels std = (2.0 / filter_elem_num) ** 0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) self.weight = self.create_parameter( shape=filter_shape, -- GitLab