未验证 提交 6edc7bba 编写于 作者: Z zqw_1997 提交者: GitHub

remove fluid.initializer.UniformInitializer, ConstantInitializer,...

remove fluid.initializer.UniformInitializer, ConstantInitializer, NormalInitializer, TruncatedNormalInitializer, XavierInitializer, BilinearInitializer, MSRAInitializer, NumpyArrayInitializer and calculate_gain.. (#49498)

* move UniformInitializer and ConstantInitializer

* more modify

* circular import resolved

* another circular import resolved?

* more circular import 2

* circular import 3

* change import paddle in metric.py

* BuildStrategy import from fluid

* modify the framework import path in common.py

* change rnn.py import, from static to original framework

* change import static in the nn folder

* default_main_program should import from common_ops_import

* add import paddle in param_attr.py

* use core not paddle module for using VarDesc

* another old uniform

* mistake that use Uniform instead of UniformInitializer

* modify UniformInitializer doc

* move fluid.NormalInitializer to nn.initializer.NormalInitializer

* remove import of Normal in fluid.layers.nn.py

* remove more import of old Normal

* remove more import of old Normal

* sample code modify and tests modify import

* is_listen_failed passing arg should be log file

* problem solved

* a mistake solved

* comments resoleved and remove paddle.fluid.initializer.TruncatedNormalInitializer

* remove paddle.fluid.initializer.XavierInitializer and paddle.fluid.initializer.MSRAInitializer

* remove paddle.fluid.initializer.BilinearInitializer NumpyArrayInitializer and set_global_initializer

* change fluid to static

* change static to fluid to avoid circular import in distributed_strategy.py

* fix example code and test_initializer

* ValueType

* sample code fix

* change set_global_initializer back to fluid

* put paddle.static.BuildStrategy.ReduceStrategy into the fuction to avoid circular import

* remove calculate_gain, delete BilinearInitializer and revert set_global_initializer

* change the time of using UniformInitializer, ConstantInitializer, NormalInitializer, TruncatedNormalInitializer, XavierInitializer, MSRAInitializer, NumpyArrayInitializer as few as possible

* fix argument incampatible

* fix more arg incompatible

* fix test_prelu_op_xpu.py Constant

* fix inaccurate doc

* more doc fix: default value
上级 2b848aef
......@@ -32,7 +32,6 @@ from paddle.fluid.framework import ( # noqa: F401
dygraph_only,
in_dygraph_mode,
)
from paddle.fluid.initializer import Constant # noqa: F401
from paddle.fluid.layer_helper import LayerHelper # noqa: F401
from paddle.fluid.layers import fill_constant, utils # noqa: F401
from paddle.fluid.layers.layer_function_generator import ( # noqa: F401
......
......@@ -104,7 +104,6 @@ class DistributedJobInfo:
self.job_info.strategy = dist_strategy
ReduceStrategyFluid = paddle.static.BuildStrategy.ReduceStrategy
ReduceStrategyFleet = int
......@@ -261,7 +260,7 @@ class DistributedStrategy:
for f in fields:
value = getattr(self.strategy.build_strategy, f.name)
if f.name == 'reduce_strategy':
value = ReduceStrategyFluid(value)
value = paddle.static.BuildStrategy.ReduceStrategy(value)
setattr(build_strategy, f.name, value)
return build_strategy
......
......@@ -18,11 +18,11 @@ import numpy as np
import paddle
from paddle import _legacy_C_ops
from paddle.common_ops_import import Variable
from paddle.fluid import core
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.fluid.framework import in_dygraph_mode
from paddle.framework import LayerHelper
from paddle.static import Variable
__all__ = []
......
......@@ -171,7 +171,7 @@ class DGCMomentumOptimizer(Optimizer):
if is_new_var:
helper.set_variable_initializer(
counter,
initializer=paddle.fluid.initializer.Constant(
initializer=paddle.nn.initializer.ConstantInitializer(
value=float(begin - 1), force_cpu=True
),
)
......@@ -194,7 +194,7 @@ class DGCMomentumOptimizer(Optimizer):
if is_new_var:
helper.set_variable_initializer(
counter,
initializer=paddle.fluid.initializer.Constant(
initializer=paddle.nn.initializer.ConstantInitializer(
value=float(value), force_cpu=True
),
)
......
......@@ -18,7 +18,7 @@ import math
import numpy as np
import paddle
from paddle.static import Variable
from paddle.common_ops_import import Variable
__all__ = []
......
......@@ -586,7 +586,6 @@ class IpuDynamicPatcher:
"""
from ..fluid.dygraph.base import switch_to_static_graph
from ..fluid import backward
from ..fluid.initializer import Constant
from ..fluid.framework import device_guard
import paddle
......@@ -645,7 +644,10 @@ class IpuDynamicPatcher:
device = optimizer._get_device_for_param(param_name)
with device_guard(device):
optimizer.helper.set_variable_initializer(
var, initializer=Constant(value=0.0)
var,
initializer=paddle.nn.initializer.Constant(
value=0.0
),
)
param_or_lr_tensor = scope.find_var(
var_tmp.name
......
......@@ -17,7 +17,6 @@ Contrib layers just related to metric.
import warnings
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.initializer import Normal, Constant
from paddle.fluid.framework import Variable
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layers import tensor
......@@ -147,7 +146,10 @@ def ctr_metric_bundle(input, label, ins_tag_weight=None):
local_ins_num,
]:
helper.set_variable_initializer(
var, Constant(value=0.0, force_cpu=True)
var,
paddle.nn.initializer.ConstantInitializer(
value=0.0, force_cpu=True
),
)
helper.append_op(
......
......@@ -24,7 +24,6 @@ import paddle
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.layers import utils
from ... import unique_name
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
from paddle.fluid.data_feeder import (
check_variable_and_dtype,
check_type,
......@@ -896,8 +895,10 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import numpy as np
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1)
tree_info = [[0,0,0,1,2],
[0,1,0,3,4],[0,1,0,5,6],
......@@ -908,7 +909,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
child_nums = 2
child, leaf_mask = fluid.contrib.layers.tdm_child(x, node_nums, child_nums,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
tree_info_np)))
place = fluid.CPUPlace()
exe = fluid.Executor(place)
......@@ -925,7 +926,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
attr=helper.param_attr,
shape=[node_nums, 3 + child_nums],
dtype=dtype,
default_initializer=Constant(0),
default_initializer=paddle.nn.initializer.Constant(0),
)
tree_info.stop_gradient = True
......@@ -1003,8 +1004,10 @@ def tdm_sampler(
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import numpy as np
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1)
travel_list = [[1, 3], [1, 4], [2, 5], [2, 6]] # leaf node's travel path, shape(leaf_node_num, layer_num)
layer_list_flat = [[1], [2], [3], [4], [5], [6]] # shape(node_nums, 1)
......@@ -1022,10 +1025,10 @@ def tdm_sampler(
layer_node_num_list,
leaf_node_num,
tree_travel_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
travel_array)),
tree_layer_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
layer_array)),
output_positive=True,
output_list=True,
......@@ -1089,7 +1092,7 @@ def tdm_sampler(
attr=tree_travel_attr,
shape=travel_shape,
dtype=tree_dtype,
default_initializer=Constant(0),
default_initializer=paddle.nn.initializer.Constant(0),
)
layer_shape = [node_nums, 1]
......@@ -1097,7 +1100,7 @@ def tdm_sampler(
attr=tree_layer_attr,
shape=layer_shape,
dtype=tree_dtype,
default_initializer=Constant(0),
default_initializer=paddle.nn.initializer.Constant(0),
)
out = helper.create_variable_for_type_inference(dtype=dtype)
......@@ -1640,7 +1643,7 @@ def fused_bn_add_act(
attr=helper.param_attr,
shape=param_shape,
dtype=bn_param_dtype,
default_initializer=Constant(1.0),
default_initializer=paddle.nn.initializer.Constant(1.0),
)
bias = helper.create_parameter(
attr=helper.bias_attr,
......@@ -1650,7 +1653,9 @@ def fused_bn_add_act(
)
mean = helper.create_parameter(
attr=ParamAttr(
name=moving_mean_name, initializer=Constant(0.0), trainable=False
name=moving_mean_name,
initializer=paddle.nn.initializer.Constant(0.0),
trainable=False,
),
shape=param_shape,
dtype=bn_param_dtype,
......@@ -1659,7 +1664,7 @@ def fused_bn_add_act(
variance = helper.create_parameter(
attr=ParamAttr(
name=moving_variance_name,
initializer=Constant(1.0),
initializer=paddle.nn.initializer.Constant(1.0),
trainable=False,
),
shape=param_shape,
......@@ -1723,13 +1728,16 @@ def pow2_decay_with_linear_warmup(
helper = LayerHelper("pow2_decay_with_linear_warmup", **locals())
lr = helper.create_global_variable(persistable=True, dtype=dtype, shape=[1])
helper.set_variable_initializer(
lr, Constant(value=float(base_lr) / warmup_steps)
lr,
paddle.nn.initializer.Constant(value=float(base_lr) / warmup_steps),
)
step = helper.create_global_variable(
persistable=True, dtype='int64', shape=[1]
)
helper.set_variable_initializer(step, Constant(value=0))
helper.set_variable_initializer(
step, paddle.nn.initializer.Constant(value=0)
)
assert (
warmup_steps <= total_steps
), "warmup_steps cannot be larger than total_steps"
......
......@@ -20,7 +20,6 @@ from . import layers
from .framework import Program, Variable, program_guard
from . import unique_name
from .layer_helper import LayerHelper
from .initializer import Constant
def _clone_var_(block, var):
......
......@@ -109,7 +109,7 @@ def model():
size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr(
name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
)
......@@ -121,7 +121,7 @@ def model():
size=dim,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
name='dnn-fc-%d' % i,
)
......@@ -134,7 +134,7 @@ def model():
size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr(
name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
)
......
此差异已折叠。
......@@ -22,7 +22,6 @@ from .framework import (
cpu_places,
)
from .param_attr import ParamAttr
from .initializer import Constant
from . import layers
from . import backward
from .dygraph import Layer
......@@ -42,7 +41,9 @@ class SimpleLayer(Layer):
self._linear1 = paddle.nn.Linear(
input_size,
3,
weight_attr=ParamAttr(initializer=Constant(value=0.1)),
weight_attr=ParamAttr(
initializer=paddle.nn.initializer.Constant(value=0.1)
),
)
def forward(self, inputs):
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import copy
import paddle
from .framework import (
Parameter,
dtype_is_floating,
......@@ -22,7 +22,6 @@ from .framework import (
_global_flags,
)
from . import unique_name
from paddle.fluid.initializer import Constant, Xavier
from .param_attr import ParamAttr
from . import core
......@@ -178,10 +177,10 @@ class LayerHelper(LayerHelperBase):
# TODO (jiabin): should we remove this since it has never be used
def _get_default_initializer(self, dtype):
if dtype is None or dtype_is_floating(dtype) is True:
return Xavier()
return paddle.nn.initializer.XavierUniform()
else:
# For integer and boolean types, initialize with all zeros
return Constant()
return paddle.nn.initializer.Constant()
# TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs
def is_instance(self, param_name, cls):
......
......@@ -92,7 +92,7 @@ class ListenAndServ:
shape=[32, 32],
dtype='float32',
name="X")
fluid.initializer.Constant(value=1.0)(x, main.global_block())
paddle.nn.initializer.Constant(value=1.0)(x, main.global_block())
paddle.scale(x=x, scale=10.0, out=out_var)
exe = fluid.Executor(place)
......
......@@ -22,7 +22,6 @@ import numpy as np
import paddle
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant
from ..framework import (
Variable,
OpProtoHolder,
......@@ -240,7 +239,7 @@ def embedding(
w_param_attrs = fluid.ParamAttr(
name="emb_weight",
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(weight_data),
initializer=paddle.nn.initializer.Assign(weight_data),
trainable=True)
emb_2 = fluid.layers.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32')
"""
......@@ -673,7 +672,10 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
)
if is_new_var:
helper.set_variable_initializer(
counter, initializer=Constant(value=begin - 1, force_cpu=True)
counter,
initializer=paddle.nn.initializer.ConstantInitializer(
value=begin - 1, force_cpu=True
),
)
helper.main_program.global_block()._prepend_op(
type='increment',
......
......@@ -19,7 +19,6 @@ import numpy as np
import copy
from .layer_helper import LayerHelper
from .initializer import Constant
from . import unique_name
from .framework import Program, Variable, program_guard
from . import layers
......
......@@ -39,7 +39,6 @@ from .backward import (
_get_no_grad_set_name,
)
from .framework import program_guard
from .initializer import Constant
from .layer_helper import LayerHelper
from .dygraph import base as imperative_base
from .dygraph import no_grad
......@@ -397,7 +396,8 @@ class Optimizer:
lr_value = float(self._learning_rate())
self.helper.set_variable_initializer(
lr_var, initializer=Constant(value=lr_value)
lr_var,
initializer=paddle.nn.initializer.Constant(value=lr_value),
)
return
......@@ -713,7 +713,10 @@ class Optimizer:
device = self._get_device_for_param(param.name)
with device_guard(device):
self.helper.set_variable_initializer(
var, initializer=Constant(value=float(fill_value))
var,
initializer=paddle.nn.initializer.Constant(
value=float(fill_value)
),
)
if in_dygraph_mode():
......@@ -774,7 +777,10 @@ class Optimizer:
device = 'cpu'
with device_guard(device):
self.helper.set_variable_initializer(
var, initializer=Constant(value=float(fill_value))
var,
initializer=paddle.nn.initializer.Constant(
value=float(fill_value)
),
)
if in_dygraph_mode():
......@@ -1225,10 +1231,12 @@ class Optimizer:
# NOTE(zhiqiu): the initializer should be set after coalesce_tensor op,
# so the shape of flatten_param and flatten_grad will be inferred.
self.helper.set_variable_initializer(
flatten_param, initializer=Constant(0.0)
flatten_param,
initializer=paddle.nn.initializer.Constant(0.0),
)
self.helper.set_variable_initializer(
flatten_grad, initializer=Constant(0.0)
flatten_grad,
initializer=paddle.nn.initializer.Constant(0.0),
)
return [(flatten_param, flatten_grad)]
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .initializer import Initializer, Xavier, Constant
import paddle
from .regularizer import WeightDecayRegularizer
from paddle.fluid.data_feeder import check_type
......@@ -88,7 +88,10 @@ class ParamAttr:
check_type(do_model_average, "do_model_average", (bool), "ParamAttr")
check_type(need_clip, "need_clip", (bool), "ParamAttr")
check_type(
initializer, "initializer", (Initializer, type(None)), "ParamAttr"
initializer,
"initializer",
(paddle.nn.initializer.Initializer, type(None)),
"ParamAttr",
)
check_type(
regularizer,
......@@ -139,7 +142,7 @@ class ParamAttr:
Returns:
None.
"""
self._set_default_initializer(Xavier())
self._set_default_initializer(paddle.nn.initializer.XavierUniform())
def _set_default_bias_initializer(self):
"""
......@@ -151,7 +154,7 @@ class ParamAttr:
Returns:
None.
"""
self._set_default_initializer(Constant(0.0))
self._set_default_initializer(paddle.nn.initializer.Constant(0.0))
@staticmethod
def _to_attr(arg):
......@@ -177,7 +180,7 @@ class ParamAttr:
return arg
elif isinstance(arg, str):
return ParamAttr(name=arg)
elif isinstance(arg, Initializer):
elif isinstance(arg, paddle.nn.initializer.Initializer):
return ParamAttr(initializer=arg)
elif isinstance(arg, WeightDecayRegularizer):
return ParamAttr(regularizer=arg)
......
......@@ -36,7 +36,6 @@ from paddle.distributed.auto_parallel.utils import (
save_distributed_checkpoint,
)
from paddle.distributed.fleet import auto
from paddle.fluid.initializer import NumpyArrayInitializer
paddle.enable_static()
_global_parallel_strategy = None
......@@ -55,8 +54,12 @@ class MLPLayer(nn.Layer):
np.random.seed(2021)
arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
arr1 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0))
weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1))
weight_attr0 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr0)
)
weight_attr1 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr1)
)
bias_attr = None
self.linear0 = nn.Linear(
d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr
......
......@@ -30,7 +30,6 @@ from paddle.distributed.auto_parallel.utils import (
save_distributed_checkpoint,
)
from paddle.distributed.fleet import auto
from paddle.fluid.initializer import NumpyArrayInitializer
paddle.enable_static()
_global_parallel_strategy = None
......@@ -48,7 +47,9 @@ class MLPLayer(nn.Layer):
dim_feedforward = intermediate_size
np.random.seed(2021)
arr = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
weight_attr = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr))
weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr)
)
bias_attr = None
self.linear0 = nn.Linear(
......
......@@ -38,15 +38,11 @@ class TestColumnParallelLinearAPI(TestCollectiveAPIRunnerBase):
paddle.distributed.broadcast(data, src=0)
if rank == 0:
param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer(
np_array[:, 0:8]
),
initializer=paddle.nn.initializer.Assign(np_array[:, 0:8]),
)
else:
param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer(
np_array[:, 8:16]
),
initializer=paddle.nn.initializer.Assign(np_array[:, 8:16]),
)
linear_out = paddle.distributed.split(
......
......@@ -242,10 +242,10 @@ class PrePostProcessLayer(Layer):
self._layer_norm = paddle.nn.LayerNorm(
normalized_shape=d_model,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0)
initializer=paddle.nn.initializer.Constant(1.0)
),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0)
initializer=paddle.nn.initializer.Constant(0.0)
),
)
......@@ -513,7 +513,9 @@ class PrepareEncoderDecoderLayer(Layer):
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name=word_emb_param_name,
initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5),
initializer=paddle.nn.initializer.Normal(
0.0, src_emb_dim**-0.5
),
),
)
......@@ -527,7 +529,7 @@ class PrepareEncoderDecoderLayer(Layer):
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
name=pos_enc_param_name,
initializer=fluid.initializer.NumpyArrayInitializer(pos_inp),
initializer=paddle.nn.initializer.Assign(pos_inp),
trainable=False,
),
)
......
......@@ -39,7 +39,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
......@@ -50,7 +50,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -65,7 +65,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
# To cover @RENAMED@GRADIENT
......@@ -74,7 +74,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
predict += predict2
......
......@@ -39,7 +39,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
......@@ -50,7 +50,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -65,7 +65,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
# To cover @RENAMED@GRADIENT
......@@ -74,7 +74,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
predict += predict2
......
......@@ -39,7 +39,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
......@@ -50,7 +50,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -64,7 +64,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
return predict
......
......@@ -33,11 +33,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE
def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr
......
......@@ -33,11 +33,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE
def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr
......@@ -65,7 +63,7 @@ def create_model(data, rank):
data,
size=OUT_SIZE,
weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(np_weight)
initializer=paddle.nn.initializer.Assign(np_weight)
),
bias_attr=bias_attr,
)
......
......@@ -44,9 +44,7 @@ def create_model(data, rank):
axis=0,
num_partitions=MODEL_PARALLEL_SIZE,
weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
np_weight_part
)
initializer=paddle.nn.initializer.Assign(np_weight_part)
),
bias_attr=False,
)
......@@ -55,7 +53,7 @@ def create_model(data, rank):
data,
size=OUT_SIZE,
weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(np_weight)
initializer=paddle.nn.initializer.Assign(np_weight)
),
bias_attr=False,
)
......
......@@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024):
else:
step = shape[0] // mp.nranks
_w = w[mp.rank * step : mp.rank * step + step, :]
return paddle.fluid.initializer.NumpyArrayInitializer(_w)
return paddle.nn.initializer.Assign(_w)
class Criterion(nn.Layer):
......
......@@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024):
else:
step = shape[0] // mp.nranks
_w = w[mp.rank * step : mp.rank * step + step, :]
return paddle.fluid.initializer.NumpyArrayInitializer(_w)
return paddle.nn.initializer.Assign(_w)
class Criterion(nn.Layer):
......
......@@ -36,7 +36,7 @@ def weight_init(mp, shape, col=True, seed=1024):
else:
step = shape[0] // mp.nranks
_w = w[mp.rank * step : mp.rank * step + step, :]
return paddle.fluid.initializer.NumpyArrayInitializer(_w)
return paddle.nn.initializer.Assign(_w)
class Criterion(nn.Layer):
......
......@@ -42,13 +42,13 @@ class TestParallelEmbeddingAPI(TestCollectiveAPIRunnerBase):
per_part_size = size[0] // 2
if rank == 0:
param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
np_array[0:per_part_size, :]
),
)
else:
param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
np_array[per_part_size : size[0], :]
),
)
......
......@@ -39,13 +39,13 @@ class TestRowParallelLinearAPI(TestCollectiveAPIRunnerBase):
data = paddle.split(data, 2, axis=1)[rank]
if rank == 0:
param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
np_array[0:500, :]
),
)
else:
param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
np_array[500:1000, :]
),
)
......
......@@ -38,7 +38,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
......@@ -49,7 +49,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -63,7 +63,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
return predict
......
......@@ -60,7 +60,7 @@ class TestDistCTR2x2(TestDistRunnerBase):
size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr(
name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=IS_SPARSE,
)
......@@ -74,7 +74,7 @@ class TestDistCTR2x2(TestDistRunnerBase):
size=dim,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
name='dnn-fc-%d' % i,
)
......@@ -87,7 +87,7 @@ class TestDistCTR2x2(TestDistRunnerBase):
size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr(
name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=IS_SPARSE,
)
......
......@@ -107,7 +107,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr(
name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
padding_idx=0,
......@@ -122,7 +122,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
size=dim,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
name='dnn-fc-%d' % i,
)
......@@ -135,7 +135,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr(
name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
padding_idx=0,
......
......@@ -78,7 +78,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr(
name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
)
......@@ -94,7 +94,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr(
name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
)
......@@ -109,7 +109,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
size=dim,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
name='dnn-fc-%d' % i,
)
......
......@@ -40,7 +40,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
......@@ -51,7 +51,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -65,7 +65,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
return predict
......
......@@ -40,7 +40,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
......@@ -51,7 +51,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -65,7 +65,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
return predict
......
......@@ -124,7 +124,8 @@ def train_network(
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), name="__emb__"
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
),
is_sparse=is_sparse,
)
......@@ -137,7 +138,7 @@ def train_network(
x=q_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__",
learning_rate=base_lr,
),
......@@ -149,7 +150,7 @@ def train_network(
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -164,7 +165,8 @@ def train_network(
x=pt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), name="__fc__"
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
),
bias_attr=fluid.ParamAttr(name="__fc_b__"),
)
......@@ -175,7 +177,8 @@ def train_network(
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), name="__emb__"
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
),
is_sparse=is_sparse,
)
......@@ -188,7 +191,8 @@ def train_network(
x=nt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), name="__fc__"
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
),
bias_attr=fluid.ParamAttr(name="__fc_b__"),
)
......
......@@ -86,11 +86,11 @@ class TestDistCTR2x2(FleetDistRunnerBase):
inference = bool(int(os.getenv("INFERENCE", "0")))
if initializer == 0:
init = fluid.initializer.Constant(value=0.01)
init = paddle.nn.initializer.Constant(value=0.01)
elif initializer == 1:
init = fluid.initializer.Uniform()
init = paddle.nn.initializer.Uniform()
elif initializer == 2:
init = fluid.initializer.Normal()
init = paddle.nn.initializer.Normal()
else:
raise ValueError("error initializer code: {}".format(initializer))
......@@ -113,7 +113,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
size=dim,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
name='dnn-fc-%d' % i,
)
......@@ -127,7 +127,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
entry=entry,
param_attr=fluid.ParamAttr(
name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
)
......
......@@ -39,7 +39,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
......@@ -50,7 +50,7 @@ def cnn_model(data):
pool_stride=2,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -64,7 +64,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
return predict
......
......@@ -121,7 +121,7 @@ class SE_ResNeXt:
size=class_dim,
activation='softmax',
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05)
initializer=paddle.nn.initializer.Constant(value=0.05)
),
)
return out
......@@ -174,7 +174,7 @@ class SE_ResNeXt:
act=None,
# avoid pserver CPU init differs from GPU
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05)
initializer=paddle.nn.initializer.Constant(value=0.05)
),
bias_attr=False,
)
......@@ -187,7 +187,7 @@ class SE_ResNeXt:
x=pool,
size=num_channels // reduction_ratio,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05)
initializer=paddle.nn.initializer.Constant(value=0.05)
),
activation='relu',
)
......@@ -196,7 +196,7 @@ class SE_ResNeXt:
x=squeeze,
size=num_channels,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05)
initializer=paddle.nn.initializer.Constant(value=0.05)
),
activation='sigmoid',
)
......
......@@ -59,7 +59,7 @@ def conv_net(
size=[dict_dim, emb_dim],
is_sparse=False,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -70,7 +70,7 @@ def conv_net(
act="tanh",
pool_type="max",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -78,7 +78,7 @@ def conv_net(
x=[conv_3],
size=fc0_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......@@ -87,7 +87,7 @@ def conv_net(
size=class_dim,
activation="softmax",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
)
......
......@@ -29,7 +29,9 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.nn.functional as F
const_para_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant(0.001))
const_para_attr = fluid.ParamAttr(
initializer=paddle.nn.initializer.Constant(0.001)
)
const_bias_attr = const_para_attr
# Fix seed for test
......@@ -1253,8 +1255,8 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.0):
out = layers.layer_norm(
out,
begin_norm_axis=len(out.shape) - 1,
param_attr=fluid.initializer.Constant(1.0),
bias_attr=fluid.initializer.Constant(0.0),
param_attr=paddle.nn.initializer.Constant(1.0),
bias_attr=paddle.nn.initializer.Constant(0.0),
)
elif cmd == "d": # add dropout
if dropout_rate:
......@@ -1292,7 +1294,7 @@ def prepare_encoder(
size=[src_vocab_size, src_emb_dim],
param_attr=fluid.ParamAttr(
name=word_emb_param_name,
initializer=fluid.initializer.ConstantInitializer(0.001),
initializer=paddle.nn.initializer.Constant(0.001),
),
)
else:
......@@ -1301,7 +1303,9 @@ def prepare_encoder(
size=[src_vocab_size, src_emb_dim],
param_attr=fluid.ParamAttr(
name=word_emb_param_name,
initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5),
initializer=paddle.nn.initializer.Normal(
0.0, src_emb_dim**-0.5
),
),
)
......@@ -1312,7 +1316,7 @@ def prepare_encoder(
param_attr=fluid.ParamAttr(
name=pos_enc_param_name,
trainable=False,
initializer=fluid.initializer.ConstantInitializer(0.001),
initializer=paddle.nn.initializer.Constant(0.001),
),
)
src_pos_enc.stop_gradient = True
......
......@@ -41,7 +41,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
name='shared_w',
initializer=fluid.initializer.Constant(value=0.1),
initializer=paddle.nn.initializer.Constant(value=0.1),
),
)
embed_second = fluid.layers.embedding(
......@@ -51,7 +51,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
name='shared_w',
initializer=fluid.initializer.Constant(value=0.1),
initializer=paddle.nn.initializer.Constant(value=0.1),
),
)
embed_third = fluid.layers.embedding(
......@@ -61,7 +61,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
name='shared_w',
initializer=fluid.initializer.Constant(value=0.1),
initializer=paddle.nn.initializer.Constant(value=0.1),
),
)
embed_forth = fluid.layers.embedding(
......@@ -71,7 +71,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
name='shared_w',
initializer=fluid.initializer.Constant(value=0.1),
initializer=paddle.nn.initializer.Constant(value=0.1),
),
)
......@@ -84,7 +84,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
size=HIDDEN_SIZE,
activation='sigmoid',
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)
initializer=paddle.nn.initializer.Constant(value=0.1)
),
)
predict_word = paddle.static.nn.fc(
......@@ -92,7 +92,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
size=dict_size,
activation='softmax',
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)
initializer=paddle.nn.initializer.Constant(value=0.1)
),
)
cost = paddle.nn.functional.cross_entropy(
......
......@@ -26,7 +26,6 @@ import paddle.nn.functional as F
import paddle.static as static
import paddle.utils as utils
from paddle.distributed.fleet import auto
from paddle.fluid.initializer import NumpyArrayInitializer
logging.getLogger().setLevel(logging.INFO)
paddle.enable_static()
......@@ -42,8 +41,12 @@ class MLPLayer(nn.Layer):
np.random.seed(2021)
arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model))
weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0))
weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1))
weight_attr0 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr0)
)
weight_attr1 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr1)
)
bias_attr = None
self.linear0 = nn.Linear(
d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr
......
......@@ -203,8 +203,8 @@ class BertModelLayer(Layer):
self._sent_emb_name = "sent_embedding"
self._dtype = "float16" if use_fp16 else "float32"
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config['initializer_range']
self._param_initializer = paddle.nn.initializer.TruncatedNormal(
std=config['initializer_range']
)
paddle.set_default_dtype(self._dtype)
self._src_emb = paddle.nn.Embedding(
......@@ -317,8 +317,8 @@ class PretrainModelLayer(Layer):
self._prepostprocess_dropout = config['hidden_dropout_prob']
self._word_emb_name = "word_embedding"
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config['initializer_range']
self._param_initializer = paddle.nn.initializer.TruncatedNormal(
std=config['initializer_range']
)
self._weight_sharing = weight_sharing
self.use_fp16 = use_fp16
......@@ -343,7 +343,7 @@ class PretrainModelLayer(Layer):
self.mask_lm_out_bias_attr = fluid.ParamAttr(
name="mask_lm_out_fc.b_0",
initializer=fluid.initializer.Constant(value=0.0),
initializer=paddle.nn.initializer.Constant(value=0.0),
)
if not self._weight_sharing:
......
......@@ -41,7 +41,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
padding=padding,
groups=groups,
weight_attr=ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02)
initializer=paddle.nn.initializer.Normal(0.0, 0.02)
),
bias_attr=False,
)
......@@ -49,11 +49,11 @@ class ConvBNLayer(fluid.dygraph.Layer):
num_channels=ch_out,
is_test=is_test,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02),
initializer=paddle.nn.initializer.Normal(0.0, 0.02),
regularizer=L2Decay(0.0),
),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
initializer=paddle.nn.initializer.Constant(0.0),
regularizer=L2Decay(0.0),
),
)
......
......@@ -26,10 +26,8 @@ from paddle.nn import Embedding
INF = 1.0 * 1e5
alpha = 0.6
uniform_initializer = lambda x: fluid.initializer.UniformInitializer(
low=-x, high=x
)
zero_constant = fluid.initializer.Constant(0.0)
uniform_initializer = lambda x: paddle.nn.initializer.Uniform(low=-x, high=x)
zero_constant = paddle.nn.initializer.Constant(0.0)
class BasicLSTMUnit(Layer):
......
......@@ -17,9 +17,9 @@ from functools import reduce
import paddle
import paddle.fluid as fluid
import paddle.fluid.param_attr as attr
from paddle.common_ops_import import Variable
from paddle.fluid.dygraph import Layer
from paddle.jit.api import to_static
from paddle.static import Variable
class EmbeddingLayer:
......@@ -48,7 +48,8 @@ class EmbeddingLayer:
sparse=True,
padding_idx=self.padding_idx,
weight_attr=attr.ParamAttr(
name=self.name, initializer=fluid.initializer.Xavier()
name=self.name,
initializer=paddle.nn.initializer.XavierUniform(),
),
)
......
......@@ -15,7 +15,7 @@
from functools import reduce
import paddle
from paddle.static import Variable
from paddle.common_ops_import import Variable
class EmbeddingLayer:
......
......@@ -118,10 +118,10 @@ def dyfunc_BilinearTensorProduct(layer1, layer2):
4,
1000,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99)
initializer=paddle.nn.initializer.Constant(value=0.99)
),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5)
initializer=paddle.nn.initializer.Constant(value=0.5)
),
)
......@@ -138,10 +138,10 @@ def dyfunc_Conv2D(input):
out_channels=2,
kernel_size=3,
weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99)
initializer=paddle.nn.initializer.Constant(value=0.99)
),
bias_attr=paddle.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5)
initializer=paddle.nn.initializer.Constant(value=0.5)
),
)
res = conv2d(input)
......@@ -170,10 +170,10 @@ def dyfunc_Conv2DTranspose(input):
12,
12,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99)
initializer=paddle.nn.initializer.Constant(value=0.99)
),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5)
initializer=paddle.nn.initializer.Constant(value=0.5)
),
)
ret = conv2dTranspose(input)
......@@ -222,7 +222,7 @@ def dyfunc_Pool2D(input):
def dyfunc_Prelu(input):
prelu0 = paddle.nn.PReLU(
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0)
initializer=paddle.nn.initializer.Constant(1.0)
),
)
res = prelu0(input)
......
......@@ -115,11 +115,11 @@ class Conv1D(fluid.dygraph.Layer):
k = 1.0 / math.sqrt(fan_in)
param_attr = ParamAttr(
name=prefix + "_w",
initializer=fluid.initializer.Uniform(low=-k, high=k),
initializer=paddle.nn.initializer.Uniform(low=-k, high=k),
)
bias_attr = ParamAttr(
name=prefix + "_b",
initializer=fluid.initializer.Uniform(low=-k, high=k),
initializer=paddle.nn.initializer.Uniform(low=-k, high=k),
)
self._conv2d = paddle.nn.Conv2D(
......
......@@ -125,10 +125,10 @@ class MyConvLayer(fluid.dygraph.Layer):
out_channels=2,
kernel_size=3,
weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99)
initializer=paddle.nn.initializer.Constant(value=0.99)
),
bias_attr=paddle.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5)
initializer=paddle.nn.initializer.Constant(value=0.5)
),
)
......
......@@ -352,7 +352,7 @@ class conv2d(fluid.dygraph.Layer):
con_bias_attr = False
else:
con_bias_attr = fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0)
initializer=paddle.nn.initializer.Constant(0.0)
)
self.conv = paddle.nn.Conv2D(
......@@ -362,9 +362,7 @@ class conv2d(fluid.dygraph.Layer):
stride=stride,
padding=padding,
weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=stddev
)
initializer=paddle.nn.initializer.Normal(mean=0.0, std=stddev)
),
bias_attr=con_bias_attr,
)
......@@ -378,10 +376,10 @@ class conv2d(fluid.dygraph.Layer):
use_global_stats=True, # set True to use deterministic algorithm
num_channels=num_filters,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(1.0, 0.02)
initializer=paddle.nn.initializer.Normal(1.0, 0.02)
),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0)
initializer=paddle.nn.initializer.Constant(0.0)
),
trainable_statistics=True,
)
......@@ -421,7 +419,7 @@ class DeConv2D(fluid.dygraph.Layer):
de_bias_attr = False
else:
de_bias_attr = fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0)
initializer=paddle.nn.initializer.Constant(0.0)
)
self._deconv = paddle.nn.Conv2DTranspose(
......@@ -431,9 +429,7 @@ class DeConv2D(fluid.dygraph.Layer):
stride=stride,
padding=padding,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=stddev
)
initializer=paddle.nn.initializer.Normal(mean=0.0, std=stddev)
),
bias_attr=de_bias_attr,
)
......@@ -444,10 +440,10 @@ class DeConv2D(fluid.dygraph.Layer):
use_global_stats=True, # set True to use deterministic algorithm
num_channels=num_filters,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(1.0, 0.02)
initializer=paddle.nn.initializer.Normal(1.0, 0.02)
),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0)
initializer=paddle.nn.initializer.Constant(0.0)
),
trainable_statistics=True,
)
......
......@@ -98,7 +98,7 @@ class BiGRU(fluid.dygraph.Layer):
in_features=input_dim,
out_features=grnn_hidden_dim * 3,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
initializer=paddle.nn.initializer.Uniform(
low=-init_bound, high=init_bound
),
regularizer=fluid.regularizer.L2DecayRegularizer(
......@@ -111,7 +111,7 @@ class BiGRU(fluid.dygraph.Layer):
size=grnn_hidden_dim,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
initializer=paddle.nn.initializer.Uniform(
low=-init_bound, high=init_bound
),
regularizer=fluid.regularizer.L2DecayRegularizer(
......@@ -124,7 +124,7 @@ class BiGRU(fluid.dygraph.Layer):
in_features=input_dim,
out_features=grnn_hidden_dim * 3,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
initializer=paddle.nn.initializer.Uniform(
low=-init_bound, high=init_bound
),
regularizer=fluid.regularizer.L2DecayRegularizer(
......@@ -138,7 +138,7 @@ class BiGRU(fluid.dygraph.Layer):
is_reverse=True,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
initializer=paddle.nn.initializer.Uniform(
low=-init_bound, high=init_bound
),
regularizer=fluid.regularizer.L2DecayRegularizer(
......@@ -375,7 +375,7 @@ class LexNet(fluid.dygraph.Layer):
weight_attr=fluid.ParamAttr(
learning_rate=self.emb_lr,
name="word_emb",
initializer=fluid.initializer.Uniform(
initializer=paddle.nn.initializer.Uniform(
low=-self.init_bound, high=self.init_bound
),
),
......@@ -415,7 +415,7 @@ class LexNet(fluid.dygraph.Layer):
in_features=self.grnn_hidden_dim * 2,
out_features=self.num_labels,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
initializer=paddle.nn.initializer.Uniform(
low=-self.init_bound, high=self.init_bound
),
regularizer=fluid.regularizer.L2DecayRegularizer(
......
......@@ -22,7 +22,6 @@ from predictor_utils import PredictorTools
import paddle
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.jit.api import to_static
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
......@@ -61,7 +60,8 @@ class ConvBNLayer(fluid.dygraph.Layer):
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "_weights"
initializer=paddle.nn.initializer.KaimingUniform(),
name=self.full_name() + "_weights",
),
bias_attr=False,
)
......@@ -259,7 +259,8 @@ class MobileNetV1(fluid.dygraph.Layer):
int(1024 * scale),
class_dim,
weight_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "fc7_weights"
initializer=paddle.nn.initializer.KaimingUniform(),
name=self.full_name() + "fc7_weights",
),
bias_attr=ParamAttr(name="fc7_offset"),
)
......
......@@ -49,26 +49,26 @@ class SimpleLSTMRNN(fluid.Layer):
for i in range(self._num_layers):
weight_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale
)
),
shape=[self._hidden_size * 2, self._hidden_size * 4],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
default_initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale
),
)
self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1))
bias_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale
)
),
shape=[self._hidden_size * 4],
dtype="float32",
default_initializer=fluid.initializer.Constant(0.0),
default_initializer=paddle.nn.initializer.Constant(0.0),
)
self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1))
......@@ -158,7 +158,7 @@ class PtbModel(fluid.Layer):
sparse=False,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale
),
),
......@@ -167,7 +167,7 @@ class PtbModel(fluid.Layer):
attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale
),
)
......@@ -175,7 +175,7 @@ class PtbModel(fluid.Layer):
attr=fluid.ParamAttr(),
shape=[self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale
),
)
......
......@@ -194,7 +194,7 @@ class ResNet(fluid.dygraph.Layer):
self.pool2d_avg_output,
class_dim,
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)
initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
),
)
......
......@@ -131,7 +131,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
num_channels,
num_channels // reduction_ratio,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)
initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
),
)
stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0)
......@@ -139,7 +139,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
num_channels // reduction_ratio,
num_channels,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)
initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
),
)
......@@ -316,7 +316,7 @@ class SeResNeXt(fluid.dygraph.Layer):
self.pool2d_avg_output,
class_dim,
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)
initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
),
)
......
......@@ -230,7 +230,7 @@ class SkipGram(fluid.dygraph.Layer):
self.embedding_size,
weight_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
initializer=paddle.nn.initializer.Uniform(
low=-0.5 / self.embedding_size,
high=0.5 / self.embedding_size,
),
......@@ -242,7 +242,7 @@ class SkipGram(fluid.dygraph.Layer):
self.embedding_size,
weight_attr=fluid.ParamAttr(
name='embedding_out_para',
initializer=fluid.initializer.UniformInitializer(
initializer=paddle.nn.initializer.Uniform(
low=-0.5 / self.embedding_size,
high=0.5 / self.embedding_size,
),
......
......@@ -62,10 +62,10 @@ class PrePostProcessLayer(Layer):
paddle.nn.LayerNorm(
normalized_shape=d_model,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0)
initializer=paddle.nn.initializer.Constant(1.0)
),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0)
initializer=paddle.nn.initializer.Constant(0.0)
),
),
)
......@@ -295,7 +295,7 @@ class Embedder(Layer):
vocab_size,
emb_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5)
initializer=paddle.nn.initializer.Normal(0.0, emb_dim**-0.5)
),
)
......@@ -330,7 +330,7 @@ class WrapEncoder(Layer):
max_length,
self.emb_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
position_encoding_init(max_length, self.emb_dim)
),
trainable=False,
......@@ -522,7 +522,7 @@ class WrapDecoder(Layer):
max_length,
self.emb_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
position_encoding_init(max_length, self.emb_dim)
),
trainable=False,
......
......@@ -253,10 +253,10 @@ class YOLOv3(fluid.dygraph.Layer):
stride=1,
padding=0,
weight_attr=ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02)
initializer=paddle.nn.initializer.Normal(0.0, 0.02)
),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
initializer=paddle.nn.initializer.Constant(0.0),
regularizer=L2Decay(0.0),
),
),
......
......@@ -70,7 +70,7 @@ def net(batch_size=4, lr=0.01):
size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr(
name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
)
......@@ -86,7 +86,7 @@ def net(batch_size=4, lr=0.01):
size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr(
name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
)
......@@ -99,7 +99,7 @@ def net(batch_size=4, lr=0.01):
size=dim,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)
initializer=paddle.nn.initializer.Constant(value=0.01)
),
name='dnn-fc-%d' % i,
)
......
......@@ -27,7 +27,6 @@ from paddle.fluid.framework import (
Operator,
convert_np_dtype_to_dtype_,
)
from paddle.fluid.initializer import NumpyArrayInitializer
from paddle.static.quantization import (
QuantizationFreezePass,
QuantizationTransformPass,
......@@ -305,7 +304,7 @@ def create_fake_model(program_config):
shape=tensor_config.shape,
type=core.VarDesc.VarType.LOD_TENSOR,
name=name,
initializer=NumpyArrayInitializer(tensor_config.data),
initializer=paddle.nn.initializer.Assign(tensor_config.data),
)
in_vars = []
for name in sorted(save_var_map.keys()):
......
......@@ -30,7 +30,7 @@ class ConvBiasMkldnnFusePassSamePadTest(InferencePassTest):
name="data", shape=[-1, 3, 100, 100], dtype="float32"
)
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False),
initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001,
)
conv_out = paddle.static.nn.conv2d(
......@@ -63,7 +63,7 @@ class ConvBiasMkldnnFusePassValidPadTest(ConvBiasMkldnnFusePassSamePadTest):
name="data", shape=[-1, 3, 100, 100], dtype="float32"
)
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False),
initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001,
)
conv_out = paddle.static.nn.conv2d(
......@@ -89,7 +89,7 @@ class ConvBiasMkldnnFusePassExplictPadTest(ConvBiasMkldnnFusePassSamePadTest):
name="data", shape=[-1, 3, 100, 100], dtype="float32"
)
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False),
initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001,
)
conv_out = paddle.static.nn.conv2d(
......@@ -114,7 +114,7 @@ class ConvBiasMkldnnFusePassGroupTest(ConvBiasMkldnnFusePassSamePadTest):
name="data", shape=[-1, 3, 100, 100], dtype="float32"
)
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False),
initializer=paddle.nn.initializer.XavierUniform(),
learning_rate=0.001,
)
conv_out = paddle.static.nn.conv2d(
......@@ -145,7 +145,7 @@ class ConvBiasMkldnnFusePassDialtionsGroupsTest(
name="data", shape=[-1, 3, 100, 100], dtype="float32"
)
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False),
initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001,
)
conv_out = paddle.static.nn.conv2d(
......@@ -173,7 +173,7 @@ class ConvTransposeMkldnnFusePassDialtionsGroupsTest(InferencePassTest):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[-1, 3, 5, 5], dtype="float32")
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False),
initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001,
)
conv_out = paddle.static.nn.conv2d_transpose(
......
......@@ -180,11 +180,11 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest):
)
param_attr = fluid.ParamAttr(
name='instance_norm_w',
initializer=fluid.initializer.Constant(value=1.0),
initializer=paddle.nn.initializer.Constant(value=1.0),
)
bias_attr = fluid.ParamAttr(
name='instance_norm_b',
initializer=fluid.initializer.Constant(value=0.0),
initializer=paddle.nn.initializer.Constant(value=0.0),
)
out = paddle.static.nn.instance_norm(
input=data, param_attr=param_attr, bias_attr=bias_attr
......
......@@ -114,11 +114,11 @@ class TestBatchNorm(unittest.TestCase):
shape[1],
is_test=is_test,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0),
initializer=paddle.nn.initializer.Constant(1.0),
trainable=False,
),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0),
initializer=paddle.nn.initializer.Constant(0.0),
trainable=False,
),
trainable_statistics=trainable_statistics,
......@@ -262,7 +262,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase):
net1 = paddle.nn.BatchNorm(
6,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0)
initializer=paddle.nn.initializer.Constant(1.0)
),
use_global_stats=self.use_global_stats,
trainable_statistics=self.trainable_statistics,
......
......@@ -322,12 +322,12 @@ class TestNetWithEpsilonTensor(unittest.TestCase):
label_np = np.random.randint(2, size=(2, 1)).astype('int64')
weight_attr1 = paddle.ParamAttr(
name="weight1",
initializer=fluid.initializer.Constant(value=1.0),
initializer=paddle.nn.initializer.Constant(value=1.0),
trainable=True,
)
weight_attr2 = paddle.ParamAttr(
name="weight2",
initializer=fluid.initializer.Constant(value=2.0),
initializer=paddle.nn.initializer.Constant(value=2.0),
trainable=True,
)
clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0)
......
......@@ -298,7 +298,7 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest):
weight_attr = fluid.ParamAttr(
name=self.input_names['Params'][0],
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
self.inputs['Params'][self.input_names['Params'][0]]
),
trainable=True,
......@@ -306,7 +306,7 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest):
bias_attr = fluid.ParamAttr(
name=self.input_names['Params'][1],
learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(
initializer=paddle.nn.initializer.Assign(
self.inputs['Params'][self.input_names['Params'][1]]
),
trainable=True,
......
......@@ -41,7 +41,7 @@ class SimpleNet(fluid.Layer):
self.hidden_size,
sparse=is_sparse,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale
)
),
......@@ -50,7 +50,7 @@ class SimpleNet(fluid.Layer):
attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size],
dtype=dtype,
default_initializer=fluid.initializer.UniformInitializer(
default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale
),
)
......@@ -58,7 +58,7 @@ class SimpleNet(fluid.Layer):
attr=fluid.ParamAttr(),
shape=[self.vocab_size],
dtype=dtype,
default_initializer=fluid.initializer.UniformInitializer(
default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale
),
)
......
......@@ -26,7 +26,7 @@ def simple_fc_net_with_inputs(img, label, class_num=10):
size=100,
activation='relu',
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=1.0)
initializer=paddle.nn.initializer.Constant(value=1.0)
),
)
prediction = paddle.static.nn.fc(
......@@ -53,7 +53,7 @@ def batchnorm_fc_with_inputs(img, label, class_num=10):
size=200,
activation='relu',
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=1.0)
initializer=paddle.nn.initializer.Constant(value=1.0)
),
)
......
......@@ -25,11 +25,9 @@ paddle.enable_static()
def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr
......
......@@ -30,11 +30,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE
def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr
......
......@@ -25,11 +25,9 @@ paddle.enable_static()
def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr
......
......@@ -767,12 +767,12 @@ class TestAdamOptimizer(unittest.TestCase):
label_np = np.random.randint(2, size=(2, 1)).astype('int64')
weight_attr1 = paddle.ParamAttr(
name="weight1",
initializer=fluid.initializer.Constant(value=1.0),
initializer=paddle.nn.initializer.Constant(value=1.0),
trainable=True,
)
weight_attr2 = paddle.ParamAttr(
name="weight2",
initializer=fluid.initializer.Constant(value=2.0),
initializer=paddle.nn.initializer.Constant(value=2.0),
trainable=True,
)
clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0)
......@@ -926,7 +926,7 @@ class TestAdamOptimizer(unittest.TestCase):
main = fluid.Program()
weight_attr = paddle.ParamAttr(
name="weight1",
initializer=fluid.initializer.Constant(value=1.0),
initializer=paddle.nn.initializer.Constant(value=1.0),
regularizer=fluid.regularizer.L1DecayRegularizer(
regularization_coeff=0.1
),
......
......@@ -39,7 +39,6 @@ from paddle.distributed.auto_parallel.partitioner import Partitioner
from paddle.distributed.auto_parallel.reshard import Resharder
from paddle.distributed.fleet import auto
from paddle.fluid import core
from paddle.fluid.initializer import NumpyArrayInitializer
if os.getenv("CUDA_VISIBLE_DEVICES") is not None:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
......@@ -373,10 +372,18 @@ class MLPLayer(nn.Layer):
arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model))
arr2 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
arr3 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model))
weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0))
weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1))
weight_attr2 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr2))
weight_attr3 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr3))
weight_attr0 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr0)
)
weight_attr1 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr1)
)
weight_attr2 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr2)
)
weight_attr3 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr3)
)
bias_attr = None
self.linear0 = nn.Linear(
d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr
......
......@@ -14,6 +14,7 @@
import unittest
import paddle
import paddle.fluid as fluid
......@@ -22,7 +23,7 @@ class TestAvoidTwiceInitialization(unittest.TestCase):
cur_program = fluid.Program()
cur_block = cur_program.current_block()
var = cur_block.create_parameter(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
shape=[2, 2],
dtype='float32',
name='var_a',
......@@ -40,7 +41,7 @@ class TestAvoidTwiceInitialization(unittest.TestCase):
attrs={'ring_id': 0},
)
var2 = cur_block.create_parameter(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
shape=[2, 2],
dtype='float32',
name='var_a',
......
......@@ -26,7 +26,7 @@ class L1(fluid.Layer):
def __init__(self):
super().__init__()
self._param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)
initializer=paddle.nn.initializer.Constant(value=0.1)
)
self.w1 = self.create_parameter(
attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False
......
......@@ -183,11 +183,11 @@ class TestBatchNorm(unittest.TestCase):
shape[1],
is_test=is_test,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0),
initializer=paddle.nn.initializer.Constant(1.0),
trainable=False,
),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0),
initializer=paddle.nn.initializer.Constant(0.0),
trainable=False,
),
trainable_statistics=trainable_statistics,
......@@ -378,7 +378,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase):
net1 = paddle.nn.BatchNorm(
6,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0)
initializer=paddle.nn.initializer.Constant(1.0)
),
use_global_stats=self.use_global_stats,
trainable_statistics=self.trainable_statistics,
......
......@@ -48,7 +48,7 @@ class TestDoubleGrad(unittest.TestCase):
name='x',
shape=[1],
dtype='float32',
default_initializer=fluid.initializer.Constant(3),
default_initializer=paddle.nn.initializer.Constant(3),
)
(grad1,) = fluid.gradients(net(x), x) # 2x = 6
z = net(x - grad1)
......@@ -69,7 +69,7 @@ class TestDoubleGrad(unittest.TestCase):
name='x',
shape=[1],
dtype='float32',
default_initializer=fluid.initializer.Constant(1),
default_initializer=paddle.nn.initializer.Constant(1),
)
y = x * x
(dx1,) = fluid.gradients(y, x)
......
......@@ -41,7 +41,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase):
size=[10000, 10],
param_attr=fluid.ParamAttr(
name="embedding",
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
)
......
......@@ -18,7 +18,6 @@ import numpy as np
import paddle
import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.nn.functional as F
from paddle import fluid, nn
......@@ -110,11 +109,11 @@ class Conv2DTestCase(unittest.TestCase):
else (-1, self.num_channels, -1, -1)
)
x_var = fluid.data("input", input_shape, dtype=self.dtype)
weight_attr = I.NumpyArrayInitializer(self.weight)
weight_attr = paddle.nn.initializer.Assign(self.weight)
if self.bias is None:
bias_attr = False
else:
bias_attr = I.NumpyArrayInitializer(self.bias)
bias_attr = paddle.nn.initializer.Assign(self.bias)
if self.padding_mode != 'zeros':
x_var = F.pad(
x_var,
......
......@@ -18,7 +18,6 @@ import numpy as np
import paddle
import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.nn.functional as F
from paddle import fluid, nn
......@@ -101,11 +100,11 @@ class Conv2DTransposeTestCase(unittest.TestCase):
else (-1, self.num_channels, -1, -1)
)
x_var = fluid.data("input", input_shape, dtype=self.dtype)
weight_attr = I.NumpyArrayInitializer(self.weight)
weight_attr = paddle.nn.initializer.Assign(self.weight)
if self.bias is None:
bias_attr = False
else:
bias_attr = I.NumpyArrayInitializer(self.bias)
bias_attr = paddle.nn.initializer.Assign(self.bias)
y_var = paddle.static.nn.conv2d_transpose(
x_var,
......
......@@ -18,7 +18,6 @@ import numpy as np
import paddle
import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.nn.functional as F
from paddle import fluid, nn
......@@ -97,11 +96,11 @@ class Conv3DTestCase(unittest.TestCase):
else (-1, self.num_channels, -1, -1, -1)
)
x_var = fluid.data("input", input_shape, dtype=self.dtype)
weight_attr = I.NumpyArrayInitializer(self.weight)
weight_attr = paddle.nn.initializer.Assign(self.weight)
if self.bias is None:
bias_attr = False
else:
bias_attr = I.NumpyArrayInitializer(self.bias)
bias_attr = paddle.nn.initializer.Assign(self.bias)
y_var = paddle.static.nn.conv3d(
x_var,
self.num_filters,
......
......@@ -18,7 +18,6 @@ import numpy as np
import paddle
import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.nn.functional as F
from paddle import fluid, nn
......@@ -99,11 +98,11 @@ class Conv3DTransposeTestCase(unittest.TestCase):
else (-1, self.num_channels, -1, -1, -1)
)
x_var = fluid.data("input", input_shape, dtype=self.dtype)
weight_attr = I.NumpyArrayInitializer(self.weight)
weight_attr = paddle.nn.initializer.Assign(self.weight)
if self.bias is None:
bias_attr = False
else:
bias_attr = I.NumpyArrayInitializer(self.bias)
bias_attr = paddle.nn.initializer.Assign(self.bias)
y_var = paddle.static.nn.conv3d_transpose(
x_var,
self.num_filters,
......
......@@ -120,15 +120,15 @@ class TestGeneratorSeed(unittest.TestCase):
result_1 = paddle.static.nn.fc(
x,
size=10,
weight_attr=fluid.initializer.TruncatedNormal(
loc=0.0, scale=2.0
weight_attr=paddle.nn.initializer.TruncatedNormal(
mean=0.0, std=2.0
),
)
result_2 = paddle.static.nn.fc(
x,
size=10,
weight_attr=fluid.initializer.TruncatedNormal(
loc=0.0, scale=2.0
weight_attr=paddle.nn.initializer.TruncatedNormal(
mean=0.0, std=2.0
),
)
......
......@@ -61,7 +61,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer):
size=hidden_size,
activation='tanh',
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=1.0)
initializer=paddle.nn.initializer.Constant(value=1.0)
),
)
......
......@@ -58,9 +58,7 @@ def cnn_model(data):
size=SIZE,
activation="softmax",
weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale
)
initializer=paddle.nn.initializer.Normal(loc=0.0, scale=scale)
),
)
return predict
......
......@@ -120,7 +120,7 @@ class Test_Detach(unittest.TestCase):
initializer=paddle.nn.initializer.Constant(5.0)
)
linear_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(6.0)
initializer=paddle.nn.initializer.Constant(6.0)
)
linear = Linear(
4,
......@@ -132,7 +132,7 @@ class Test_Detach(unittest.TestCase):
initializer=paddle.nn.initializer.Constant(7.0)
)
linear1_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(8.0)
initializer=paddle.nn.initializer.Constant(8.0)
)
linear1 = Linear(
10,
......
......@@ -58,7 +58,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
size=[1000000000, 100000],
param_attr=paddle.fluid.ParamAttr(
name="embedding",
initializer=paddle.fluid.initializer.Constant(value=0.01),
initializer=paddle.paddle.nn.initializer.Constant(value=0.01),
),
is_sparse=True,
)
......
......@@ -89,7 +89,7 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=[100001, 10],
param_attr=fluid.ParamAttr(
name="SparseFeatFactors",
initializer=fluid.initializer.Uniform(),
initializer=paddle.nn.initializer.Uniform(),
),
)
......@@ -103,8 +103,8 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=400,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(concated.shape[1])
initializer=paddle.nn.initializer.Normal(
std=1 / math.sqrt(concated.shape[1])
)
),
name="fc1",
......@@ -116,8 +116,8 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=400,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc1.shape[1])
initializer=paddle.nn.initializer.Normal(
std=1 / math.sqrt(fc1.shape[1])
)
),
name="fc2",
......@@ -129,8 +129,8 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=400,
activation="relu",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc2.shape[1])
initializer=paddle.nn.initializer.Normal(
std=1 / math.sqrt(fc2.shape[1])
)
),
name="fc3",
......@@ -142,8 +142,8 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=2,
activation="softmax",
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc3.shape[1])
initializer=paddle.nn.initializer.Normal(
std=1 / math.sqrt(fc3.shape[1])
)
),
)
......
......@@ -77,7 +77,7 @@ class TestPSMinimize(unittest.TestCase):
input=q,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -91,7 +91,7 @@ class TestPSMinimize(unittest.TestCase):
x=q_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__",
learning_rate=base_lr,
),
......@@ -107,7 +107,7 @@ class TestPSMinimize(unittest.TestCase):
input=pt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -121,7 +121,7 @@ class TestPSMinimize(unittest.TestCase):
x=pt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......@@ -136,7 +136,7 @@ class TestPSMinimize(unittest.TestCase):
input=nt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -150,7 +150,7 @@ class TestPSMinimize(unittest.TestCase):
x=nt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......
......@@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__",
learning_rate=base_lr,
),
......@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......@@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......
......@@ -77,7 +77,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=q,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -91,7 +91,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__",
learning_rate=base_lr,
),
......@@ -107,7 +107,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=pt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -121,7 +121,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......@@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=nt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -150,7 +150,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......
......@@ -80,7 +80,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=q,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -94,7 +94,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__",
learning_rate=base_lr,
),
......@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=pt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -124,7 +124,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......@@ -139,7 +139,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=nt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -153,7 +153,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......
......@@ -81,7 +81,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=q,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -95,7 +95,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__",
learning_rate=base_lr,
),
......@@ -111,7 +111,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=pt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......@@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=nt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -154,7 +154,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......
......@@ -80,7 +80,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=q,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -95,7 +95,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__",
learning_rate=base_lr,
),
......@@ -111,7 +111,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=pt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......@@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=nt,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -154,7 +154,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......
......@@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__",
learning_rate=base_lr,
),
......@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......@@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
learning_rate=emb_lr,
),
......@@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss,
size=hid_dim,
weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01),
initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
learning_rate=base_lr,
),
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册