未验证 提交 6edc7bba 编写于 作者: Z zqw_1997 提交者: GitHub

remove fluid.initializer.UniformInitializer, ConstantInitializer,...

remove fluid.initializer.UniformInitializer, ConstantInitializer, NormalInitializer, TruncatedNormalInitializer, XavierInitializer, BilinearInitializer, MSRAInitializer, NumpyArrayInitializer and calculate_gain.. (#49498)

* move UniformInitializer and ConstantInitializer

* more modify

* circular import resolved

* another circular import resolved?

* more circular import 2

* circular import 3

* change import paddle in metric.py

* BuildStrategy import from fluid

* modify the framework import path in common.py

* change rnn.py import, from static to original framework

* change import static in the nn folder

* default_main_program should import from common_ops_import

* add import paddle in param_attr.py

* use core not paddle module for using VarDesc

* another old uniform

* mistake that use Uniform instead of UniformInitializer

* modify UniformInitializer doc

* move fluid.NormalInitializer to nn.initializer.NormalInitializer

* remove import of Normal in fluid.layers.nn.py

* remove more import of old Normal

* remove more import of old Normal

* sample code modify and tests modify import

* is_listen_failed passing arg should be log file

* problem solved

* a mistake solved

* comments resoleved and remove paddle.fluid.initializer.TruncatedNormalInitializer

* remove paddle.fluid.initializer.XavierInitializer and paddle.fluid.initializer.MSRAInitializer

* remove paddle.fluid.initializer.BilinearInitializer NumpyArrayInitializer and set_global_initializer

* change fluid to static

* change static to fluid to avoid circular import in distributed_strategy.py

* fix example code and test_initializer

* ValueType

* sample code fix

* change set_global_initializer back to fluid

* put paddle.static.BuildStrategy.ReduceStrategy into the fuction to avoid circular import

* remove calculate_gain, delete BilinearInitializer and revert set_global_initializer

* change the time of using UniformInitializer, ConstantInitializer, NormalInitializer, TruncatedNormalInitializer, XavierInitializer, MSRAInitializer, NumpyArrayInitializer as few as possible

* fix argument incampatible

* fix more arg incompatible

* fix test_prelu_op_xpu.py Constant

* fix inaccurate doc

* more doc fix: default value
上级 2b848aef
...@@ -32,7 +32,6 @@ from paddle.fluid.framework import ( # noqa: F401 ...@@ -32,7 +32,6 @@ from paddle.fluid.framework import ( # noqa: F401
dygraph_only, dygraph_only,
in_dygraph_mode, in_dygraph_mode,
) )
from paddle.fluid.initializer import Constant # noqa: F401
from paddle.fluid.layer_helper import LayerHelper # noqa: F401 from paddle.fluid.layer_helper import LayerHelper # noqa: F401
from paddle.fluid.layers import fill_constant, utils # noqa: F401 from paddle.fluid.layers import fill_constant, utils # noqa: F401
from paddle.fluid.layers.layer_function_generator import ( # noqa: F401 from paddle.fluid.layers.layer_function_generator import ( # noqa: F401
......
...@@ -104,7 +104,6 @@ class DistributedJobInfo: ...@@ -104,7 +104,6 @@ class DistributedJobInfo:
self.job_info.strategy = dist_strategy self.job_info.strategy = dist_strategy
ReduceStrategyFluid = paddle.static.BuildStrategy.ReduceStrategy
ReduceStrategyFleet = int ReduceStrategyFleet = int
...@@ -261,7 +260,7 @@ class DistributedStrategy: ...@@ -261,7 +260,7 @@ class DistributedStrategy:
for f in fields: for f in fields:
value = getattr(self.strategy.build_strategy, f.name) value = getattr(self.strategy.build_strategy, f.name)
if f.name == 'reduce_strategy': if f.name == 'reduce_strategy':
value = ReduceStrategyFluid(value) value = paddle.static.BuildStrategy.ReduceStrategy(value)
setattr(build_strategy, f.name, value) setattr(build_strategy, f.name, value)
return build_strategy return build_strategy
......
...@@ -18,11 +18,11 @@ import numpy as np ...@@ -18,11 +18,11 @@ import numpy as np
import paddle import paddle
from paddle import _legacy_C_ops from paddle import _legacy_C_ops
from paddle.common_ops_import import Variable
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.data_feeder import check_variable_and_dtype from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.fluid.framework import in_dygraph_mode from paddle.fluid.framework import in_dygraph_mode
from paddle.framework import LayerHelper from paddle.framework import LayerHelper
from paddle.static import Variable
__all__ = [] __all__ = []
......
...@@ -171,7 +171,7 @@ class DGCMomentumOptimizer(Optimizer): ...@@ -171,7 +171,7 @@ class DGCMomentumOptimizer(Optimizer):
if is_new_var: if is_new_var:
helper.set_variable_initializer( helper.set_variable_initializer(
counter, counter,
initializer=paddle.fluid.initializer.Constant( initializer=paddle.nn.initializer.ConstantInitializer(
value=float(begin - 1), force_cpu=True value=float(begin - 1), force_cpu=True
), ),
) )
...@@ -194,7 +194,7 @@ class DGCMomentumOptimizer(Optimizer): ...@@ -194,7 +194,7 @@ class DGCMomentumOptimizer(Optimizer):
if is_new_var: if is_new_var:
helper.set_variable_initializer( helper.set_variable_initializer(
counter, counter,
initializer=paddle.fluid.initializer.Constant( initializer=paddle.nn.initializer.ConstantInitializer(
value=float(value), force_cpu=True value=float(value), force_cpu=True
), ),
) )
......
...@@ -18,7 +18,7 @@ import math ...@@ -18,7 +18,7 @@ import math
import numpy as np import numpy as np
import paddle import paddle
from paddle.static import Variable from paddle.common_ops_import import Variable
__all__ = [] __all__ = []
......
...@@ -586,7 +586,6 @@ class IpuDynamicPatcher: ...@@ -586,7 +586,6 @@ class IpuDynamicPatcher:
""" """
from ..fluid.dygraph.base import switch_to_static_graph from ..fluid.dygraph.base import switch_to_static_graph
from ..fluid import backward from ..fluid import backward
from ..fluid.initializer import Constant
from ..fluid.framework import device_guard from ..fluid.framework import device_guard
import paddle import paddle
...@@ -645,7 +644,10 @@ class IpuDynamicPatcher: ...@@ -645,7 +644,10 @@ class IpuDynamicPatcher:
device = optimizer._get_device_for_param(param_name) device = optimizer._get_device_for_param(param_name)
with device_guard(device): with device_guard(device):
optimizer.helper.set_variable_initializer( optimizer.helper.set_variable_initializer(
var, initializer=Constant(value=0.0) var,
initializer=paddle.nn.initializer.Constant(
value=0.0
),
) )
param_or_lr_tensor = scope.find_var( param_or_lr_tensor = scope.find_var(
var_tmp.name var_tmp.name
......
...@@ -17,7 +17,6 @@ Contrib layers just related to metric. ...@@ -17,7 +17,6 @@ Contrib layers just related to metric.
import warnings import warnings
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.initializer import Normal, Constant
from paddle.fluid.framework import Variable from paddle.fluid.framework import Variable
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layers import tensor from paddle.fluid.layers import tensor
...@@ -147,7 +146,10 @@ def ctr_metric_bundle(input, label, ins_tag_weight=None): ...@@ -147,7 +146,10 @@ def ctr_metric_bundle(input, label, ins_tag_weight=None):
local_ins_num, local_ins_num,
]: ]:
helper.set_variable_initializer( helper.set_variable_initializer(
var, Constant(value=0.0, force_cpu=True) var,
paddle.nn.initializer.ConstantInitializer(
value=0.0, force_cpu=True
),
) )
helper.append_op( helper.append_op(
......
...@@ -24,7 +24,6 @@ import paddle ...@@ -24,7 +24,6 @@ import paddle
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.layers import utils from paddle.fluid.layers import utils
from ... import unique_name from ... import unique_name
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
from paddle.fluid.data_feeder import ( from paddle.fluid.data_feeder import (
check_variable_and_dtype, check_variable_and_dtype,
check_type, check_type,
...@@ -896,8 +895,10 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): ...@@ -896,8 +895,10 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1) x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1)
tree_info = [[0,0,0,1,2], tree_info = [[0,0,0,1,2],
[0,1,0,3,4],[0,1,0,5,6], [0,1,0,3,4],[0,1,0,5,6],
...@@ -908,7 +909,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): ...@@ -908,7 +909,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
child_nums = 2 child_nums = 2
child, leaf_mask = fluid.contrib.layers.tdm_child(x, node_nums, child_nums, child, leaf_mask = fluid.contrib.layers.tdm_child(x, node_nums, child_nums,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
tree_info_np))) tree_info_np)))
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
...@@ -925,7 +926,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): ...@@ -925,7 +926,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
attr=helper.param_attr, attr=helper.param_attr,
shape=[node_nums, 3 + child_nums], shape=[node_nums, 3 + child_nums],
dtype=dtype, dtype=dtype,
default_initializer=Constant(0), default_initializer=paddle.nn.initializer.Constant(0),
) )
tree_info.stop_gradient = True tree_info.stop_gradient = True
...@@ -1003,8 +1004,10 @@ def tdm_sampler( ...@@ -1003,8 +1004,10 @@ def tdm_sampler(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1) x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1)
travel_list = [[1, 3], [1, 4], [2, 5], [2, 6]] # leaf node's travel path, shape(leaf_node_num, layer_num) travel_list = [[1, 3], [1, 4], [2, 5], [2, 6]] # leaf node's travel path, shape(leaf_node_num, layer_num)
layer_list_flat = [[1], [2], [3], [4], [5], [6]] # shape(node_nums, 1) layer_list_flat = [[1], [2], [3], [4], [5], [6]] # shape(node_nums, 1)
...@@ -1022,10 +1025,10 @@ def tdm_sampler( ...@@ -1022,10 +1025,10 @@ def tdm_sampler(
layer_node_num_list, layer_node_num_list,
leaf_node_num, leaf_node_num,
tree_travel_attr=fluid.ParamAttr( tree_travel_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
travel_array)), travel_array)),
tree_layer_attr=fluid.ParamAttr( tree_layer_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
layer_array)), layer_array)),
output_positive=True, output_positive=True,
output_list=True, output_list=True,
...@@ -1089,7 +1092,7 @@ def tdm_sampler( ...@@ -1089,7 +1092,7 @@ def tdm_sampler(
attr=tree_travel_attr, attr=tree_travel_attr,
shape=travel_shape, shape=travel_shape,
dtype=tree_dtype, dtype=tree_dtype,
default_initializer=Constant(0), default_initializer=paddle.nn.initializer.Constant(0),
) )
layer_shape = [node_nums, 1] layer_shape = [node_nums, 1]
...@@ -1097,7 +1100,7 @@ def tdm_sampler( ...@@ -1097,7 +1100,7 @@ def tdm_sampler(
attr=tree_layer_attr, attr=tree_layer_attr,
shape=layer_shape, shape=layer_shape,
dtype=tree_dtype, dtype=tree_dtype,
default_initializer=Constant(0), default_initializer=paddle.nn.initializer.Constant(0),
) )
out = helper.create_variable_for_type_inference(dtype=dtype) out = helper.create_variable_for_type_inference(dtype=dtype)
...@@ -1640,7 +1643,7 @@ def fused_bn_add_act( ...@@ -1640,7 +1643,7 @@ def fused_bn_add_act(
attr=helper.param_attr, attr=helper.param_attr,
shape=param_shape, shape=param_shape,
dtype=bn_param_dtype, dtype=bn_param_dtype,
default_initializer=Constant(1.0), default_initializer=paddle.nn.initializer.Constant(1.0),
) )
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.bias_attr, attr=helper.bias_attr,
...@@ -1650,7 +1653,9 @@ def fused_bn_add_act( ...@@ -1650,7 +1653,9 @@ def fused_bn_add_act(
) )
mean = helper.create_parameter( mean = helper.create_parameter(
attr=ParamAttr( attr=ParamAttr(
name=moving_mean_name, initializer=Constant(0.0), trainable=False name=moving_mean_name,
initializer=paddle.nn.initializer.Constant(0.0),
trainable=False,
), ),
shape=param_shape, shape=param_shape,
dtype=bn_param_dtype, dtype=bn_param_dtype,
...@@ -1659,7 +1664,7 @@ def fused_bn_add_act( ...@@ -1659,7 +1664,7 @@ def fused_bn_add_act(
variance = helper.create_parameter( variance = helper.create_parameter(
attr=ParamAttr( attr=ParamAttr(
name=moving_variance_name, name=moving_variance_name,
initializer=Constant(1.0), initializer=paddle.nn.initializer.Constant(1.0),
trainable=False, trainable=False,
), ),
shape=param_shape, shape=param_shape,
...@@ -1723,13 +1728,16 @@ def pow2_decay_with_linear_warmup( ...@@ -1723,13 +1728,16 @@ def pow2_decay_with_linear_warmup(
helper = LayerHelper("pow2_decay_with_linear_warmup", **locals()) helper = LayerHelper("pow2_decay_with_linear_warmup", **locals())
lr = helper.create_global_variable(persistable=True, dtype=dtype, shape=[1]) lr = helper.create_global_variable(persistable=True, dtype=dtype, shape=[1])
helper.set_variable_initializer( helper.set_variable_initializer(
lr, Constant(value=float(base_lr) / warmup_steps) lr,
paddle.nn.initializer.Constant(value=float(base_lr) / warmup_steps),
) )
step = helper.create_global_variable( step = helper.create_global_variable(
persistable=True, dtype='int64', shape=[1] persistable=True, dtype='int64', shape=[1]
) )
helper.set_variable_initializer(step, Constant(value=0)) helper.set_variable_initializer(
step, paddle.nn.initializer.Constant(value=0)
)
assert ( assert (
warmup_steps <= total_steps warmup_steps <= total_steps
), "warmup_steps cannot be larger than total_steps" ), "warmup_steps cannot be larger than total_steps"
......
...@@ -20,7 +20,6 @@ from . import layers ...@@ -20,7 +20,6 @@ from . import layers
from .framework import Program, Variable, program_guard from .framework import Program, Variable, program_guard
from . import unique_name from . import unique_name
from .layer_helper import LayerHelper from .layer_helper import LayerHelper
from .initializer import Constant
def _clone_var_(block, var): def _clone_var_(block, var):
......
...@@ -109,7 +109,7 @@ def model(): ...@@ -109,7 +109,7 @@ def model():
size=[dnn_input_dim, dnn_layer_dims[0]], size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="deep_embedding", name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
) )
...@@ -121,7 +121,7 @@ def model(): ...@@ -121,7 +121,7 @@ def model():
size=dim, size=dim,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
name='dnn-fc-%d' % i, name='dnn-fc-%d' % i,
) )
...@@ -134,7 +134,7 @@ def model(): ...@@ -134,7 +134,7 @@ def model():
size=[lr_input_dim, 1], size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="wide_embedding", name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
) )
......
此差异已折叠。
...@@ -22,7 +22,6 @@ from .framework import ( ...@@ -22,7 +22,6 @@ from .framework import (
cpu_places, cpu_places,
) )
from .param_attr import ParamAttr from .param_attr import ParamAttr
from .initializer import Constant
from . import layers from . import layers
from . import backward from . import backward
from .dygraph import Layer from .dygraph import Layer
...@@ -42,7 +41,9 @@ class SimpleLayer(Layer): ...@@ -42,7 +41,9 @@ class SimpleLayer(Layer):
self._linear1 = paddle.nn.Linear( self._linear1 = paddle.nn.Linear(
input_size, input_size,
3, 3,
weight_attr=ParamAttr(initializer=Constant(value=0.1)), weight_attr=ParamAttr(
initializer=paddle.nn.initializer.Constant(value=0.1)
),
) )
def forward(self, inputs): def forward(self, inputs):
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import copy import copy
import paddle
from .framework import ( from .framework import (
Parameter, Parameter,
dtype_is_floating, dtype_is_floating,
...@@ -22,7 +22,6 @@ from .framework import ( ...@@ -22,7 +22,6 @@ from .framework import (
_global_flags, _global_flags,
) )
from . import unique_name from . import unique_name
from paddle.fluid.initializer import Constant, Xavier
from .param_attr import ParamAttr from .param_attr import ParamAttr
from . import core from . import core
...@@ -178,10 +177,10 @@ class LayerHelper(LayerHelperBase): ...@@ -178,10 +177,10 @@ class LayerHelper(LayerHelperBase):
# TODO (jiabin): should we remove this since it has never be used # TODO (jiabin): should we remove this since it has never be used
def _get_default_initializer(self, dtype): def _get_default_initializer(self, dtype):
if dtype is None or dtype_is_floating(dtype) is True: if dtype is None or dtype_is_floating(dtype) is True:
return Xavier() return paddle.nn.initializer.XavierUniform()
else: else:
# For integer and boolean types, initialize with all zeros # For integer and boolean types, initialize with all zeros
return Constant() return paddle.nn.initializer.Constant()
# TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs # TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs
def is_instance(self, param_name, cls): def is_instance(self, param_name, cls):
......
...@@ -92,7 +92,7 @@ class ListenAndServ: ...@@ -92,7 +92,7 @@ class ListenAndServ:
shape=[32, 32], shape=[32, 32],
dtype='float32', dtype='float32',
name="X") name="X")
fluid.initializer.Constant(value=1.0)(x, main.global_block()) paddle.nn.initializer.Constant(value=1.0)(x, main.global_block())
paddle.scale(x=x, scale=10.0, out=out_var) paddle.scale(x=x, scale=10.0, out=out_var)
exe = fluid.Executor(place) exe = fluid.Executor(place)
......
...@@ -22,7 +22,6 @@ import numpy as np ...@@ -22,7 +22,6 @@ import numpy as np
import paddle import paddle
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant
from ..framework import ( from ..framework import (
Variable, Variable,
OpProtoHolder, OpProtoHolder,
...@@ -240,7 +239,7 @@ def embedding( ...@@ -240,7 +239,7 @@ def embedding(
w_param_attrs = fluid.ParamAttr( w_param_attrs = fluid.ParamAttr(
name="emb_weight", name="emb_weight",
learning_rate=0.5, learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer(weight_data), initializer=paddle.nn.initializer.Assign(weight_data),
trainable=True) trainable=True)
emb_2 = fluid.layers.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32') emb_2 = fluid.layers.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32')
""" """
...@@ -673,7 +672,10 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ...@@ -673,7 +672,10 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
) )
if is_new_var: if is_new_var:
helper.set_variable_initializer( helper.set_variable_initializer(
counter, initializer=Constant(value=begin - 1, force_cpu=True) counter,
initializer=paddle.nn.initializer.ConstantInitializer(
value=begin - 1, force_cpu=True
),
) )
helper.main_program.global_block()._prepend_op( helper.main_program.global_block()._prepend_op(
type='increment', type='increment',
......
...@@ -19,7 +19,6 @@ import numpy as np ...@@ -19,7 +19,6 @@ import numpy as np
import copy import copy
from .layer_helper import LayerHelper from .layer_helper import LayerHelper
from .initializer import Constant
from . import unique_name from . import unique_name
from .framework import Program, Variable, program_guard from .framework import Program, Variable, program_guard
from . import layers from . import layers
......
...@@ -39,7 +39,6 @@ from .backward import ( ...@@ -39,7 +39,6 @@ from .backward import (
_get_no_grad_set_name, _get_no_grad_set_name,
) )
from .framework import program_guard from .framework import program_guard
from .initializer import Constant
from .layer_helper import LayerHelper from .layer_helper import LayerHelper
from .dygraph import base as imperative_base from .dygraph import base as imperative_base
from .dygraph import no_grad from .dygraph import no_grad
...@@ -397,7 +396,8 @@ class Optimizer: ...@@ -397,7 +396,8 @@ class Optimizer:
lr_value = float(self._learning_rate()) lr_value = float(self._learning_rate())
self.helper.set_variable_initializer( self.helper.set_variable_initializer(
lr_var, initializer=Constant(value=lr_value) lr_var,
initializer=paddle.nn.initializer.Constant(value=lr_value),
) )
return return
...@@ -713,7 +713,10 @@ class Optimizer: ...@@ -713,7 +713,10 @@ class Optimizer:
device = self._get_device_for_param(param.name) device = self._get_device_for_param(param.name)
with device_guard(device): with device_guard(device):
self.helper.set_variable_initializer( self.helper.set_variable_initializer(
var, initializer=Constant(value=float(fill_value)) var,
initializer=paddle.nn.initializer.Constant(
value=float(fill_value)
),
) )
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -774,7 +777,10 @@ class Optimizer: ...@@ -774,7 +777,10 @@ class Optimizer:
device = 'cpu' device = 'cpu'
with device_guard(device): with device_guard(device):
self.helper.set_variable_initializer( self.helper.set_variable_initializer(
var, initializer=Constant(value=float(fill_value)) var,
initializer=paddle.nn.initializer.Constant(
value=float(fill_value)
),
) )
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -1225,10 +1231,12 @@ class Optimizer: ...@@ -1225,10 +1231,12 @@ class Optimizer:
# NOTE(zhiqiu): the initializer should be set after coalesce_tensor op, # NOTE(zhiqiu): the initializer should be set after coalesce_tensor op,
# so the shape of flatten_param and flatten_grad will be inferred. # so the shape of flatten_param and flatten_grad will be inferred.
self.helper.set_variable_initializer( self.helper.set_variable_initializer(
flatten_param, initializer=Constant(0.0) flatten_param,
initializer=paddle.nn.initializer.Constant(0.0),
) )
self.helper.set_variable_initializer( self.helper.set_variable_initializer(
flatten_grad, initializer=Constant(0.0) flatten_grad,
initializer=paddle.nn.initializer.Constant(0.0),
) )
return [(flatten_param, flatten_grad)] return [(flatten_param, flatten_grad)]
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .initializer import Initializer, Xavier, Constant import paddle
from .regularizer import WeightDecayRegularizer from .regularizer import WeightDecayRegularizer
from paddle.fluid.data_feeder import check_type from paddle.fluid.data_feeder import check_type
...@@ -88,7 +88,10 @@ class ParamAttr: ...@@ -88,7 +88,10 @@ class ParamAttr:
check_type(do_model_average, "do_model_average", (bool), "ParamAttr") check_type(do_model_average, "do_model_average", (bool), "ParamAttr")
check_type(need_clip, "need_clip", (bool), "ParamAttr") check_type(need_clip, "need_clip", (bool), "ParamAttr")
check_type( check_type(
initializer, "initializer", (Initializer, type(None)), "ParamAttr" initializer,
"initializer",
(paddle.nn.initializer.Initializer, type(None)),
"ParamAttr",
) )
check_type( check_type(
regularizer, regularizer,
...@@ -139,7 +142,7 @@ class ParamAttr: ...@@ -139,7 +142,7 @@ class ParamAttr:
Returns: Returns:
None. None.
""" """
self._set_default_initializer(Xavier()) self._set_default_initializer(paddle.nn.initializer.XavierUniform())
def _set_default_bias_initializer(self): def _set_default_bias_initializer(self):
""" """
...@@ -151,7 +154,7 @@ class ParamAttr: ...@@ -151,7 +154,7 @@ class ParamAttr:
Returns: Returns:
None. None.
""" """
self._set_default_initializer(Constant(0.0)) self._set_default_initializer(paddle.nn.initializer.Constant(0.0))
@staticmethod @staticmethod
def _to_attr(arg): def _to_attr(arg):
...@@ -177,7 +180,7 @@ class ParamAttr: ...@@ -177,7 +180,7 @@ class ParamAttr:
return arg return arg
elif isinstance(arg, str): elif isinstance(arg, str):
return ParamAttr(name=arg) return ParamAttr(name=arg)
elif isinstance(arg, Initializer): elif isinstance(arg, paddle.nn.initializer.Initializer):
return ParamAttr(initializer=arg) return ParamAttr(initializer=arg)
elif isinstance(arg, WeightDecayRegularizer): elif isinstance(arg, WeightDecayRegularizer):
return ParamAttr(regularizer=arg) return ParamAttr(regularizer=arg)
......
...@@ -36,7 +36,6 @@ from paddle.distributed.auto_parallel.utils import ( ...@@ -36,7 +36,6 @@ from paddle.distributed.auto_parallel.utils import (
save_distributed_checkpoint, save_distributed_checkpoint,
) )
from paddle.distributed.fleet import auto from paddle.distributed.fleet import auto
from paddle.fluid.initializer import NumpyArrayInitializer
paddle.enable_static() paddle.enable_static()
_global_parallel_strategy = None _global_parallel_strategy = None
...@@ -55,8 +54,12 @@ class MLPLayer(nn.Layer): ...@@ -55,8 +54,12 @@ class MLPLayer(nn.Layer):
np.random.seed(2021) np.random.seed(2021)
arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
arr1 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr1 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) weight_attr0 = paddle.ParamAttr(
weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) initializer=paddle.nn.initializer.Assign(arr0)
)
weight_attr1 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr1)
)
bias_attr = None bias_attr = None
self.linear0 = nn.Linear( self.linear0 = nn.Linear(
d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr
......
...@@ -30,7 +30,6 @@ from paddle.distributed.auto_parallel.utils import ( ...@@ -30,7 +30,6 @@ from paddle.distributed.auto_parallel.utils import (
save_distributed_checkpoint, save_distributed_checkpoint,
) )
from paddle.distributed.fleet import auto from paddle.distributed.fleet import auto
from paddle.fluid.initializer import NumpyArrayInitializer
paddle.enable_static() paddle.enable_static()
_global_parallel_strategy = None _global_parallel_strategy = None
...@@ -48,7 +47,9 @@ class MLPLayer(nn.Layer): ...@@ -48,7 +47,9 @@ class MLPLayer(nn.Layer):
dim_feedforward = intermediate_size dim_feedforward = intermediate_size
np.random.seed(2021) np.random.seed(2021)
arr = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
weight_attr = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr)) weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr)
)
bias_attr = None bias_attr = None
self.linear0 = nn.Linear( self.linear0 = nn.Linear(
......
...@@ -38,15 +38,11 @@ class TestColumnParallelLinearAPI(TestCollectiveAPIRunnerBase): ...@@ -38,15 +38,11 @@ class TestColumnParallelLinearAPI(TestCollectiveAPIRunnerBase):
paddle.distributed.broadcast(data, src=0) paddle.distributed.broadcast(data, src=0)
if rank == 0: if rank == 0:
param_attr = paddle.fluid.ParamAttr( param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(np_array[:, 0:8]),
np_array[:, 0:8]
),
) )
else: else:
param_attr = paddle.fluid.ParamAttr( param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(np_array[:, 8:16]),
np_array[:, 8:16]
),
) )
linear_out = paddle.distributed.split( linear_out = paddle.distributed.split(
......
...@@ -242,10 +242,10 @@ class PrePostProcessLayer(Layer): ...@@ -242,10 +242,10 @@ class PrePostProcessLayer(Layer):
self._layer_norm = paddle.nn.LayerNorm( self._layer_norm = paddle.nn.LayerNorm(
normalized_shape=d_model, normalized_shape=d_model,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0) initializer=paddle.nn.initializer.Constant(1.0)
), ),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0) initializer=paddle.nn.initializer.Constant(0.0)
), ),
) )
...@@ -513,7 +513,9 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -513,7 +513,9 @@ class PrepareEncoderDecoderLayer(Layer):
sparse=is_sparse, sparse=is_sparse,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name=word_emb_param_name, name=word_emb_param_name,
initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), initializer=paddle.nn.initializer.Normal(
0.0, src_emb_dim**-0.5
),
), ),
) )
...@@ -527,7 +529,7 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -527,7 +529,7 @@ class PrepareEncoderDecoderLayer(Layer):
sparse=is_sparse, sparse=is_sparse,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name=pos_enc_param_name, name=pos_enc_param_name,
initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), initializer=paddle.nn.initializer.Assign(pos_inp),
trainable=False, trainable=False,
), ),
) )
......
...@@ -39,7 +39,7 @@ def cnn_model(data): ...@@ -39,7 +39,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
conv_pool_2 = fluid.nets.simple_img_conv_pool( conv_pool_2 = fluid.nets.simple_img_conv_pool(
...@@ -50,7 +50,7 @@ def cnn_model(data): ...@@ -50,7 +50,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -65,7 +65,7 @@ def cnn_model(data): ...@@ -65,7 +65,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
# To cover @RENAMED@GRADIENT # To cover @RENAMED@GRADIENT
...@@ -74,7 +74,7 @@ def cnn_model(data): ...@@ -74,7 +74,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
predict += predict2 predict += predict2
......
...@@ -39,7 +39,7 @@ def cnn_model(data): ...@@ -39,7 +39,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
conv_pool_2 = fluid.nets.simple_img_conv_pool( conv_pool_2 = fluid.nets.simple_img_conv_pool(
...@@ -50,7 +50,7 @@ def cnn_model(data): ...@@ -50,7 +50,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -65,7 +65,7 @@ def cnn_model(data): ...@@ -65,7 +65,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
# To cover @RENAMED@GRADIENT # To cover @RENAMED@GRADIENT
...@@ -74,7 +74,7 @@ def cnn_model(data): ...@@ -74,7 +74,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
predict += predict2 predict += predict2
......
...@@ -39,7 +39,7 @@ def cnn_model(data): ...@@ -39,7 +39,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
conv_pool_2 = fluid.nets.simple_img_conv_pool( conv_pool_2 = fluid.nets.simple_img_conv_pool(
...@@ -50,7 +50,7 @@ def cnn_model(data): ...@@ -50,7 +50,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -64,7 +64,7 @@ def cnn_model(data): ...@@ -64,7 +64,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
return predict return predict
......
...@@ -33,11 +33,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE ...@@ -33,11 +33,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE
def get_param_attr(weight, bias): def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr( weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight) initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
) )
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr return weight_attr, bias_attr
......
...@@ -33,11 +33,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE ...@@ -33,11 +33,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE
def get_param_attr(weight, bias): def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr( weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight) initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
) )
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr return weight_attr, bias_attr
...@@ -65,7 +63,7 @@ def create_model(data, rank): ...@@ -65,7 +63,7 @@ def create_model(data, rank):
data, data,
size=OUT_SIZE, size=OUT_SIZE,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(np_weight) initializer=paddle.nn.initializer.Assign(np_weight)
), ),
bias_attr=bias_attr, bias_attr=bias_attr,
) )
......
...@@ -44,9 +44,7 @@ def create_model(data, rank): ...@@ -44,9 +44,7 @@ def create_model(data, rank):
axis=0, axis=0,
num_partitions=MODEL_PARALLEL_SIZE, num_partitions=MODEL_PARALLEL_SIZE,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(np_weight_part)
np_weight_part
)
), ),
bias_attr=False, bias_attr=False,
) )
...@@ -55,7 +53,7 @@ def create_model(data, rank): ...@@ -55,7 +53,7 @@ def create_model(data, rank):
data, data,
size=OUT_SIZE, size=OUT_SIZE,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(np_weight) initializer=paddle.nn.initializer.Assign(np_weight)
), ),
bias_attr=False, bias_attr=False,
) )
......
...@@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024): ...@@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024):
else: else:
step = shape[0] // mp.nranks step = shape[0] // mp.nranks
_w = w[mp.rank * step : mp.rank * step + step, :] _w = w[mp.rank * step : mp.rank * step + step, :]
return paddle.fluid.initializer.NumpyArrayInitializer(_w) return paddle.nn.initializer.Assign(_w)
class Criterion(nn.Layer): class Criterion(nn.Layer):
......
...@@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024): ...@@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024):
else: else:
step = shape[0] // mp.nranks step = shape[0] // mp.nranks
_w = w[mp.rank * step : mp.rank * step + step, :] _w = w[mp.rank * step : mp.rank * step + step, :]
return paddle.fluid.initializer.NumpyArrayInitializer(_w) return paddle.nn.initializer.Assign(_w)
class Criterion(nn.Layer): class Criterion(nn.Layer):
......
...@@ -36,7 +36,7 @@ def weight_init(mp, shape, col=True, seed=1024): ...@@ -36,7 +36,7 @@ def weight_init(mp, shape, col=True, seed=1024):
else: else:
step = shape[0] // mp.nranks step = shape[0] // mp.nranks
_w = w[mp.rank * step : mp.rank * step + step, :] _w = w[mp.rank * step : mp.rank * step + step, :]
return paddle.fluid.initializer.NumpyArrayInitializer(_w) return paddle.nn.initializer.Assign(_w)
class Criterion(nn.Layer): class Criterion(nn.Layer):
......
...@@ -42,13 +42,13 @@ class TestParallelEmbeddingAPI(TestCollectiveAPIRunnerBase): ...@@ -42,13 +42,13 @@ class TestParallelEmbeddingAPI(TestCollectiveAPIRunnerBase):
per_part_size = size[0] // 2 per_part_size = size[0] // 2
if rank == 0: if rank == 0:
param_attr = paddle.fluid.ParamAttr( param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
np_array[0:per_part_size, :] np_array[0:per_part_size, :]
), ),
) )
else: else:
param_attr = paddle.fluid.ParamAttr( param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
np_array[per_part_size : size[0], :] np_array[per_part_size : size[0], :]
), ),
) )
......
...@@ -39,13 +39,13 @@ class TestRowParallelLinearAPI(TestCollectiveAPIRunnerBase): ...@@ -39,13 +39,13 @@ class TestRowParallelLinearAPI(TestCollectiveAPIRunnerBase):
data = paddle.split(data, 2, axis=1)[rank] data = paddle.split(data, 2, axis=1)[rank]
if rank == 0: if rank == 0:
param_attr = paddle.fluid.ParamAttr( param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
np_array[0:500, :] np_array[0:500, :]
), ),
) )
else: else:
param_attr = paddle.fluid.ParamAttr( param_attr = paddle.fluid.ParamAttr(
initializer=paddle.fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
np_array[500:1000, :] np_array[500:1000, :]
), ),
) )
......
...@@ -38,7 +38,7 @@ def cnn_model(data): ...@@ -38,7 +38,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
conv_pool_2 = fluid.nets.simple_img_conv_pool( conv_pool_2 = fluid.nets.simple_img_conv_pool(
...@@ -49,7 +49,7 @@ def cnn_model(data): ...@@ -49,7 +49,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -63,7 +63,7 @@ def cnn_model(data): ...@@ -63,7 +63,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
return predict return predict
......
...@@ -60,7 +60,7 @@ class TestDistCTR2x2(TestDistRunnerBase): ...@@ -60,7 +60,7 @@ class TestDistCTR2x2(TestDistRunnerBase):
size=[dnn_input_dim, dnn_layer_dims[0]], size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="deep_embedding", name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
) )
...@@ -74,7 +74,7 @@ class TestDistCTR2x2(TestDistRunnerBase): ...@@ -74,7 +74,7 @@ class TestDistCTR2x2(TestDistRunnerBase):
size=dim, size=dim,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
name='dnn-fc-%d' % i, name='dnn-fc-%d' % i,
) )
...@@ -87,7 +87,7 @@ class TestDistCTR2x2(TestDistRunnerBase): ...@@ -87,7 +87,7 @@ class TestDistCTR2x2(TestDistRunnerBase):
size=[lr_input_dim, 1], size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="wide_embedding", name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
) )
......
...@@ -107,7 +107,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): ...@@ -107,7 +107,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
size=[dnn_input_dim, dnn_layer_dims[0]], size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="deep_embedding", name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
padding_idx=0, padding_idx=0,
...@@ -122,7 +122,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): ...@@ -122,7 +122,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
size=dim, size=dim,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
name='dnn-fc-%d' % i, name='dnn-fc-%d' % i,
) )
...@@ -135,7 +135,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): ...@@ -135,7 +135,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
size=[lr_input_dim, 1], size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="wide_embedding", name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
padding_idx=0, padding_idx=0,
......
...@@ -78,7 +78,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): ...@@ -78,7 +78,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
size=[dnn_input_dim, dnn_layer_dims[0]], size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="deep_embedding", name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
) )
...@@ -94,7 +94,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): ...@@ -94,7 +94,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
size=[lr_input_dim, 1], size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="wide_embedding", name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
) )
...@@ -109,7 +109,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): ...@@ -109,7 +109,7 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
size=dim, size=dim,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
name='dnn-fc-%d' % i, name='dnn-fc-%d' % i,
) )
......
...@@ -40,7 +40,7 @@ def cnn_model(data): ...@@ -40,7 +40,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
conv_pool_2 = fluid.nets.simple_img_conv_pool( conv_pool_2 = fluid.nets.simple_img_conv_pool(
...@@ -51,7 +51,7 @@ def cnn_model(data): ...@@ -51,7 +51,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -65,7 +65,7 @@ def cnn_model(data): ...@@ -65,7 +65,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
return predict return predict
......
...@@ -40,7 +40,7 @@ def cnn_model(data): ...@@ -40,7 +40,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
conv_pool_2 = fluid.nets.simple_img_conv_pool( conv_pool_2 = fluid.nets.simple_img_conv_pool(
...@@ -51,7 +51,7 @@ def cnn_model(data): ...@@ -51,7 +51,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -65,7 +65,7 @@ def cnn_model(data): ...@@ -65,7 +65,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
return predict return predict
......
...@@ -124,7 +124,8 @@ def train_network( ...@@ -124,7 +124,8 @@ def train_network(
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), name="__emb__" initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
), ),
is_sparse=is_sparse, is_sparse=is_sparse,
) )
...@@ -137,7 +138,7 @@ def train_network( ...@@ -137,7 +138,7 @@ def train_network(
x=q_ss, x=q_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__", name="__q_fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -149,7 +150,7 @@ def train_network( ...@@ -149,7 +150,7 @@ def train_network(
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -164,7 +165,8 @@ def train_network( ...@@ -164,7 +165,8 @@ def train_network(
x=pt_ss, x=pt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), name="__fc__" initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
), ),
bias_attr=fluid.ParamAttr(name="__fc_b__"), bias_attr=fluid.ParamAttr(name="__fc_b__"),
) )
...@@ -175,7 +177,8 @@ def train_network( ...@@ -175,7 +177,8 @@ def train_network(
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), name="__emb__" initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__",
), ),
is_sparse=is_sparse, is_sparse=is_sparse,
) )
...@@ -188,7 +191,8 @@ def train_network( ...@@ -188,7 +191,8 @@ def train_network(
x=nt_ss, x=nt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), name="__fc__" initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__",
), ),
bias_attr=fluid.ParamAttr(name="__fc_b__"), bias_attr=fluid.ParamAttr(name="__fc_b__"),
) )
......
...@@ -86,11 +86,11 @@ class TestDistCTR2x2(FleetDistRunnerBase): ...@@ -86,11 +86,11 @@ class TestDistCTR2x2(FleetDistRunnerBase):
inference = bool(int(os.getenv("INFERENCE", "0"))) inference = bool(int(os.getenv("INFERENCE", "0")))
if initializer == 0: if initializer == 0:
init = fluid.initializer.Constant(value=0.01) init = paddle.nn.initializer.Constant(value=0.01)
elif initializer == 1: elif initializer == 1:
init = fluid.initializer.Uniform() init = paddle.nn.initializer.Uniform()
elif initializer == 2: elif initializer == 2:
init = fluid.initializer.Normal() init = paddle.nn.initializer.Normal()
else: else:
raise ValueError("error initializer code: {}".format(initializer)) raise ValueError("error initializer code: {}".format(initializer))
...@@ -113,7 +113,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): ...@@ -113,7 +113,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
size=dim, size=dim,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
name='dnn-fc-%d' % i, name='dnn-fc-%d' % i,
) )
...@@ -127,7 +127,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): ...@@ -127,7 +127,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
entry=entry, entry=entry,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="wide_embedding", name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
) )
......
...@@ -39,7 +39,7 @@ def cnn_model(data): ...@@ -39,7 +39,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
conv_pool_2 = fluid.nets.simple_img_conv_pool( conv_pool_2 = fluid.nets.simple_img_conv_pool(
...@@ -50,7 +50,7 @@ def cnn_model(data): ...@@ -50,7 +50,7 @@ def cnn_model(data):
pool_stride=2, pool_stride=2,
act="relu", act="relu",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -64,7 +64,7 @@ def cnn_model(data): ...@@ -64,7 +64,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
return predict return predict
......
...@@ -121,7 +121,7 @@ class SE_ResNeXt: ...@@ -121,7 +121,7 @@ class SE_ResNeXt:
size=class_dim, size=class_dim,
activation='softmax', activation='softmax',
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05) initializer=paddle.nn.initializer.Constant(value=0.05)
), ),
) )
return out return out
...@@ -174,7 +174,7 @@ class SE_ResNeXt: ...@@ -174,7 +174,7 @@ class SE_ResNeXt:
act=None, act=None,
# avoid pserver CPU init differs from GPU # avoid pserver CPU init differs from GPU
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05) initializer=paddle.nn.initializer.Constant(value=0.05)
), ),
bias_attr=False, bias_attr=False,
) )
...@@ -187,7 +187,7 @@ class SE_ResNeXt: ...@@ -187,7 +187,7 @@ class SE_ResNeXt:
x=pool, x=pool,
size=num_channels // reduction_ratio, size=num_channels // reduction_ratio,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05) initializer=paddle.nn.initializer.Constant(value=0.05)
), ),
activation='relu', activation='relu',
) )
...@@ -196,7 +196,7 @@ class SE_ResNeXt: ...@@ -196,7 +196,7 @@ class SE_ResNeXt:
x=squeeze, x=squeeze,
size=num_channels, size=num_channels,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05) initializer=paddle.nn.initializer.Constant(value=0.05)
), ),
activation='sigmoid', activation='sigmoid',
) )
......
...@@ -59,7 +59,7 @@ def conv_net( ...@@ -59,7 +59,7 @@ def conv_net(
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
is_sparse=False, is_sparse=False,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -70,7 +70,7 @@ def conv_net( ...@@ -70,7 +70,7 @@ def conv_net(
act="tanh", act="tanh",
pool_type="max", pool_type="max",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -78,7 +78,7 @@ def conv_net( ...@@ -78,7 +78,7 @@ def conv_net(
x=[conv_3], x=[conv_3],
size=fc0_dim, size=fc0_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
...@@ -87,7 +87,7 @@ def conv_net( ...@@ -87,7 +87,7 @@ def conv_net(
size=class_dim, size=class_dim,
activation="softmax", activation="softmax",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
) )
......
...@@ -29,7 +29,9 @@ import paddle.fluid as fluid ...@@ -29,7 +29,9 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.nn.functional as F import paddle.nn.functional as F
const_para_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant(0.001)) const_para_attr = fluid.ParamAttr(
initializer=paddle.nn.initializer.Constant(0.001)
)
const_bias_attr = const_para_attr const_bias_attr = const_para_attr
# Fix seed for test # Fix seed for test
...@@ -1253,8 +1255,8 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.0): ...@@ -1253,8 +1255,8 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.0):
out = layers.layer_norm( out = layers.layer_norm(
out, out,
begin_norm_axis=len(out.shape) - 1, begin_norm_axis=len(out.shape) - 1,
param_attr=fluid.initializer.Constant(1.0), param_attr=paddle.nn.initializer.Constant(1.0),
bias_attr=fluid.initializer.Constant(0.0), bias_attr=paddle.nn.initializer.Constant(0.0),
) )
elif cmd == "d": # add dropout elif cmd == "d": # add dropout
if dropout_rate: if dropout_rate:
...@@ -1292,7 +1294,7 @@ def prepare_encoder( ...@@ -1292,7 +1294,7 @@ def prepare_encoder(
size=[src_vocab_size, src_emb_dim], size=[src_vocab_size, src_emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=word_emb_param_name, name=word_emb_param_name,
initializer=fluid.initializer.ConstantInitializer(0.001), initializer=paddle.nn.initializer.Constant(0.001),
), ),
) )
else: else:
...@@ -1301,7 +1303,9 @@ def prepare_encoder( ...@@ -1301,7 +1303,9 @@ def prepare_encoder(
size=[src_vocab_size, src_emb_dim], size=[src_vocab_size, src_emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=word_emb_param_name, name=word_emb_param_name,
initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), initializer=paddle.nn.initializer.Normal(
0.0, src_emb_dim**-0.5
),
), ),
) )
...@@ -1312,7 +1316,7 @@ def prepare_encoder( ...@@ -1312,7 +1316,7 @@ def prepare_encoder(
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=pos_enc_param_name, name=pos_enc_param_name,
trainable=False, trainable=False,
initializer=fluid.initializer.ConstantInitializer(0.001), initializer=paddle.nn.initializer.Constant(0.001),
), ),
) )
src_pos_enc.stop_gradient = True src_pos_enc.stop_gradient = True
......
...@@ -41,7 +41,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): ...@@ -41,7 +41,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name='shared_w', name='shared_w',
initializer=fluid.initializer.Constant(value=0.1), initializer=paddle.nn.initializer.Constant(value=0.1),
), ),
) )
embed_second = fluid.layers.embedding( embed_second = fluid.layers.embedding(
...@@ -51,7 +51,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): ...@@ -51,7 +51,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name='shared_w', name='shared_w',
initializer=fluid.initializer.Constant(value=0.1), initializer=paddle.nn.initializer.Constant(value=0.1),
), ),
) )
embed_third = fluid.layers.embedding( embed_third = fluid.layers.embedding(
...@@ -61,7 +61,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): ...@@ -61,7 +61,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name='shared_w', name='shared_w',
initializer=fluid.initializer.Constant(value=0.1), initializer=paddle.nn.initializer.Constant(value=0.1),
), ),
) )
embed_forth = fluid.layers.embedding( embed_forth = fluid.layers.embedding(
...@@ -71,7 +71,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): ...@@ -71,7 +71,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name='shared_w', name='shared_w',
initializer=fluid.initializer.Constant(value=0.1), initializer=paddle.nn.initializer.Constant(value=0.1),
), ),
) )
...@@ -84,7 +84,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): ...@@ -84,7 +84,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
size=HIDDEN_SIZE, size=HIDDEN_SIZE,
activation='sigmoid', activation='sigmoid',
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1) initializer=paddle.nn.initializer.Constant(value=0.1)
), ),
) )
predict_word = paddle.static.nn.fc( predict_word = paddle.static.nn.fc(
...@@ -92,7 +92,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase): ...@@ -92,7 +92,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
size=dict_size, size=dict_size,
activation='softmax', activation='softmax',
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1) initializer=paddle.nn.initializer.Constant(value=0.1)
), ),
) )
cost = paddle.nn.functional.cross_entropy( cost = paddle.nn.functional.cross_entropy(
......
...@@ -26,7 +26,6 @@ import paddle.nn.functional as F ...@@ -26,7 +26,6 @@ import paddle.nn.functional as F
import paddle.static as static import paddle.static as static
import paddle.utils as utils import paddle.utils as utils
from paddle.distributed.fleet import auto from paddle.distributed.fleet import auto
from paddle.fluid.initializer import NumpyArrayInitializer
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
paddle.enable_static() paddle.enable_static()
...@@ -42,8 +41,12 @@ class MLPLayer(nn.Layer): ...@@ -42,8 +41,12 @@ class MLPLayer(nn.Layer):
np.random.seed(2021) np.random.seed(2021)
arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model))
weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) weight_attr0 = paddle.ParamAttr(
weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) initializer=paddle.nn.initializer.Assign(arr0)
)
weight_attr1 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr1)
)
bias_attr = None bias_attr = None
self.linear0 = nn.Linear( self.linear0 = nn.Linear(
d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr
......
...@@ -203,8 +203,8 @@ class BertModelLayer(Layer): ...@@ -203,8 +203,8 @@ class BertModelLayer(Layer):
self._sent_emb_name = "sent_embedding" self._sent_emb_name = "sent_embedding"
self._dtype = "float16" if use_fp16 else "float32" self._dtype = "float16" if use_fp16 else "float32"
self._param_initializer = fluid.initializer.TruncatedNormal( self._param_initializer = paddle.nn.initializer.TruncatedNormal(
scale=config['initializer_range'] std=config['initializer_range']
) )
paddle.set_default_dtype(self._dtype) paddle.set_default_dtype(self._dtype)
self._src_emb = paddle.nn.Embedding( self._src_emb = paddle.nn.Embedding(
...@@ -317,8 +317,8 @@ class PretrainModelLayer(Layer): ...@@ -317,8 +317,8 @@ class PretrainModelLayer(Layer):
self._prepostprocess_dropout = config['hidden_dropout_prob'] self._prepostprocess_dropout = config['hidden_dropout_prob']
self._word_emb_name = "word_embedding" self._word_emb_name = "word_embedding"
self._param_initializer = fluid.initializer.TruncatedNormal( self._param_initializer = paddle.nn.initializer.TruncatedNormal(
scale=config['initializer_range'] std=config['initializer_range']
) )
self._weight_sharing = weight_sharing self._weight_sharing = weight_sharing
self.use_fp16 = use_fp16 self.use_fp16 = use_fp16
...@@ -343,7 +343,7 @@ class PretrainModelLayer(Layer): ...@@ -343,7 +343,7 @@ class PretrainModelLayer(Layer):
self.mask_lm_out_bias_attr = fluid.ParamAttr( self.mask_lm_out_bias_attr = fluid.ParamAttr(
name="mask_lm_out_fc.b_0", name="mask_lm_out_fc.b_0",
initializer=fluid.initializer.Constant(value=0.0), initializer=paddle.nn.initializer.Constant(value=0.0),
) )
if not self._weight_sharing: if not self._weight_sharing:
......
...@@ -41,7 +41,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -41,7 +41,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
padding=padding, padding=padding,
groups=groups, groups=groups,
weight_attr=ParamAttr( weight_attr=ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02) initializer=paddle.nn.initializer.Normal(0.0, 0.02)
), ),
bias_attr=False, bias_attr=False,
) )
...@@ -49,11 +49,11 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -49,11 +49,11 @@ class ConvBNLayer(fluid.dygraph.Layer):
num_channels=ch_out, num_channels=ch_out,
is_test=is_test, is_test=is_test,
param_attr=ParamAttr( param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02), initializer=paddle.nn.initializer.Normal(0.0, 0.02),
regularizer=L2Decay(0.0), regularizer=L2Decay(0.0),
), ),
bias_attr=ParamAttr( bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0), initializer=paddle.nn.initializer.Constant(0.0),
regularizer=L2Decay(0.0), regularizer=L2Decay(0.0),
), ),
) )
......
...@@ -26,10 +26,8 @@ from paddle.nn import Embedding ...@@ -26,10 +26,8 @@ from paddle.nn import Embedding
INF = 1.0 * 1e5 INF = 1.0 * 1e5
alpha = 0.6 alpha = 0.6
uniform_initializer = lambda x: fluid.initializer.UniformInitializer( uniform_initializer = lambda x: paddle.nn.initializer.Uniform(low=-x, high=x)
low=-x, high=x zero_constant = paddle.nn.initializer.Constant(0.0)
)
zero_constant = fluid.initializer.Constant(0.0)
class BasicLSTMUnit(Layer): class BasicLSTMUnit(Layer):
......
...@@ -17,9 +17,9 @@ from functools import reduce ...@@ -17,9 +17,9 @@ from functools import reduce
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.param_attr as attr import paddle.fluid.param_attr as attr
from paddle.common_ops_import import Variable
from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph import Layer
from paddle.jit.api import to_static from paddle.jit.api import to_static
from paddle.static import Variable
class EmbeddingLayer: class EmbeddingLayer:
...@@ -48,7 +48,8 @@ class EmbeddingLayer: ...@@ -48,7 +48,8 @@ class EmbeddingLayer:
sparse=True, sparse=True,
padding_idx=self.padding_idx, padding_idx=self.padding_idx,
weight_attr=attr.ParamAttr( weight_attr=attr.ParamAttr(
name=self.name, initializer=fluid.initializer.Xavier() name=self.name,
initializer=paddle.nn.initializer.XavierUniform(),
), ),
) )
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
from functools import reduce from functools import reduce
import paddle import paddle
from paddle.static import Variable from paddle.common_ops_import import Variable
class EmbeddingLayer: class EmbeddingLayer:
......
...@@ -118,10 +118,10 @@ def dyfunc_BilinearTensorProduct(layer1, layer2): ...@@ -118,10 +118,10 @@ def dyfunc_BilinearTensorProduct(layer1, layer2):
4, 4,
1000, 1000,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99) initializer=paddle.nn.initializer.Constant(value=0.99)
), ),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5) initializer=paddle.nn.initializer.Constant(value=0.5)
), ),
) )
...@@ -138,10 +138,10 @@ def dyfunc_Conv2D(input): ...@@ -138,10 +138,10 @@ def dyfunc_Conv2D(input):
out_channels=2, out_channels=2,
kernel_size=3, kernel_size=3,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99) initializer=paddle.nn.initializer.Constant(value=0.99)
), ),
bias_attr=paddle.ParamAttr( bias_attr=paddle.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5) initializer=paddle.nn.initializer.Constant(value=0.5)
), ),
) )
res = conv2d(input) res = conv2d(input)
...@@ -170,10 +170,10 @@ def dyfunc_Conv2DTranspose(input): ...@@ -170,10 +170,10 @@ def dyfunc_Conv2DTranspose(input):
12, 12,
12, 12,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99) initializer=paddle.nn.initializer.Constant(value=0.99)
), ),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5) initializer=paddle.nn.initializer.Constant(value=0.5)
), ),
) )
ret = conv2dTranspose(input) ret = conv2dTranspose(input)
...@@ -222,7 +222,7 @@ def dyfunc_Pool2D(input): ...@@ -222,7 +222,7 @@ def dyfunc_Pool2D(input):
def dyfunc_Prelu(input): def dyfunc_Prelu(input):
prelu0 = paddle.nn.PReLU( prelu0 = paddle.nn.PReLU(
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0) initializer=paddle.nn.initializer.Constant(1.0)
), ),
) )
res = prelu0(input) res = prelu0(input)
......
...@@ -115,11 +115,11 @@ class Conv1D(fluid.dygraph.Layer): ...@@ -115,11 +115,11 @@ class Conv1D(fluid.dygraph.Layer):
k = 1.0 / math.sqrt(fan_in) k = 1.0 / math.sqrt(fan_in)
param_attr = ParamAttr( param_attr = ParamAttr(
name=prefix + "_w", name=prefix + "_w",
initializer=fluid.initializer.Uniform(low=-k, high=k), initializer=paddle.nn.initializer.Uniform(low=-k, high=k),
) )
bias_attr = ParamAttr( bias_attr = ParamAttr(
name=prefix + "_b", name=prefix + "_b",
initializer=fluid.initializer.Uniform(low=-k, high=k), initializer=paddle.nn.initializer.Uniform(low=-k, high=k),
) )
self._conv2d = paddle.nn.Conv2D( self._conv2d = paddle.nn.Conv2D(
......
...@@ -125,10 +125,10 @@ class MyConvLayer(fluid.dygraph.Layer): ...@@ -125,10 +125,10 @@ class MyConvLayer(fluid.dygraph.Layer):
out_channels=2, out_channels=2,
kernel_size=3, kernel_size=3,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.Constant(value=0.99) initializer=paddle.nn.initializer.Constant(value=0.99)
), ),
bias_attr=paddle.ParamAttr( bias_attr=paddle.ParamAttr(
initializer=fluid.initializer.Constant(value=0.5) initializer=paddle.nn.initializer.Constant(value=0.5)
), ),
) )
......
...@@ -352,7 +352,7 @@ class conv2d(fluid.dygraph.Layer): ...@@ -352,7 +352,7 @@ class conv2d(fluid.dygraph.Layer):
con_bias_attr = False con_bias_attr = False
else: else:
con_bias_attr = fluid.ParamAttr( con_bias_attr = fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0) initializer=paddle.nn.initializer.Constant(0.0)
) )
self.conv = paddle.nn.Conv2D( self.conv = paddle.nn.Conv2D(
...@@ -362,9 +362,7 @@ class conv2d(fluid.dygraph.Layer): ...@@ -362,9 +362,7 @@ class conv2d(fluid.dygraph.Layer):
stride=stride, stride=stride,
padding=padding, padding=padding,
weight_attr=paddle.ParamAttr( weight_attr=paddle.ParamAttr(
initializer=fluid.initializer.NormalInitializer( initializer=paddle.nn.initializer.Normal(mean=0.0, std=stddev)
loc=0.0, scale=stddev
)
), ),
bias_attr=con_bias_attr, bias_attr=con_bias_attr,
) )
...@@ -378,10 +376,10 @@ class conv2d(fluid.dygraph.Layer): ...@@ -378,10 +376,10 @@ class conv2d(fluid.dygraph.Layer):
use_global_stats=True, # set True to use deterministic algorithm use_global_stats=True, # set True to use deterministic algorithm
num_channels=num_filters, num_channels=num_filters,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(1.0, 0.02) initializer=paddle.nn.initializer.Normal(1.0, 0.02)
), ),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0) initializer=paddle.nn.initializer.Constant(0.0)
), ),
trainable_statistics=True, trainable_statistics=True,
) )
...@@ -421,7 +419,7 @@ class DeConv2D(fluid.dygraph.Layer): ...@@ -421,7 +419,7 @@ class DeConv2D(fluid.dygraph.Layer):
de_bias_attr = False de_bias_attr = False
else: else:
de_bias_attr = fluid.ParamAttr( de_bias_attr = fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0) initializer=paddle.nn.initializer.Constant(0.0)
) )
self._deconv = paddle.nn.Conv2DTranspose( self._deconv = paddle.nn.Conv2DTranspose(
...@@ -431,9 +429,7 @@ class DeConv2D(fluid.dygraph.Layer): ...@@ -431,9 +429,7 @@ class DeConv2D(fluid.dygraph.Layer):
stride=stride, stride=stride,
padding=padding, padding=padding,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer( initializer=paddle.nn.initializer.Normal(mean=0.0, std=stddev)
loc=0.0, scale=stddev
)
), ),
bias_attr=de_bias_attr, bias_attr=de_bias_attr,
) )
...@@ -444,10 +440,10 @@ class DeConv2D(fluid.dygraph.Layer): ...@@ -444,10 +440,10 @@ class DeConv2D(fluid.dygraph.Layer):
use_global_stats=True, # set True to use deterministic algorithm use_global_stats=True, # set True to use deterministic algorithm
num_channels=num_filters, num_channels=num_filters,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(1.0, 0.02) initializer=paddle.nn.initializer.Normal(1.0, 0.02)
), ),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0) initializer=paddle.nn.initializer.Constant(0.0)
), ),
trainable_statistics=True, trainable_statistics=True,
) )
......
...@@ -98,7 +98,7 @@ class BiGRU(fluid.dygraph.Layer): ...@@ -98,7 +98,7 @@ class BiGRU(fluid.dygraph.Layer):
in_features=input_dim, in_features=input_dim,
out_features=grnn_hidden_dim * 3, out_features=grnn_hidden_dim * 3,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=paddle.nn.initializer.Uniform(
low=-init_bound, high=init_bound low=-init_bound, high=init_bound
), ),
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
...@@ -111,7 +111,7 @@ class BiGRU(fluid.dygraph.Layer): ...@@ -111,7 +111,7 @@ class BiGRU(fluid.dygraph.Layer):
size=grnn_hidden_dim, size=grnn_hidden_dim,
h_0=h_0, h_0=h_0,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=paddle.nn.initializer.Uniform(
low=-init_bound, high=init_bound low=-init_bound, high=init_bound
), ),
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
...@@ -124,7 +124,7 @@ class BiGRU(fluid.dygraph.Layer): ...@@ -124,7 +124,7 @@ class BiGRU(fluid.dygraph.Layer):
in_features=input_dim, in_features=input_dim,
out_features=grnn_hidden_dim * 3, out_features=grnn_hidden_dim * 3,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=paddle.nn.initializer.Uniform(
low=-init_bound, high=init_bound low=-init_bound, high=init_bound
), ),
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
...@@ -138,7 +138,7 @@ class BiGRU(fluid.dygraph.Layer): ...@@ -138,7 +138,7 @@ class BiGRU(fluid.dygraph.Layer):
is_reverse=True, is_reverse=True,
h_0=h_0, h_0=h_0,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=paddle.nn.initializer.Uniform(
low=-init_bound, high=init_bound low=-init_bound, high=init_bound
), ),
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
...@@ -375,7 +375,7 @@ class LexNet(fluid.dygraph.Layer): ...@@ -375,7 +375,7 @@ class LexNet(fluid.dygraph.Layer):
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
learning_rate=self.emb_lr, learning_rate=self.emb_lr,
name="word_emb", name="word_emb",
initializer=fluid.initializer.Uniform( initializer=paddle.nn.initializer.Uniform(
low=-self.init_bound, high=self.init_bound low=-self.init_bound, high=self.init_bound
), ),
), ),
...@@ -415,7 +415,7 @@ class LexNet(fluid.dygraph.Layer): ...@@ -415,7 +415,7 @@ class LexNet(fluid.dygraph.Layer):
in_features=self.grnn_hidden_dim * 2, in_features=self.grnn_hidden_dim * 2,
out_features=self.num_labels, out_features=self.num_labels,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=paddle.nn.initializer.Uniform(
low=-self.init_bound, high=self.init_bound low=-self.init_bound, high=self.init_bound
), ),
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
......
...@@ -22,7 +22,6 @@ from predictor_utils import PredictorTools ...@@ -22,7 +22,6 @@ from predictor_utils import PredictorTools
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.jit.api import to_static from paddle.jit.api import to_static
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
...@@ -61,7 +60,8 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -61,7 +60,8 @@ class ConvBNLayer(fluid.dygraph.Layer):
padding=padding, padding=padding,
groups=num_groups, groups=num_groups,
weight_attr=ParamAttr( weight_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "_weights" initializer=paddle.nn.initializer.KaimingUniform(),
name=self.full_name() + "_weights",
), ),
bias_attr=False, bias_attr=False,
) )
...@@ -259,7 +259,8 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -259,7 +259,8 @@ class MobileNetV1(fluid.dygraph.Layer):
int(1024 * scale), int(1024 * scale),
class_dim, class_dim,
weight_attr=ParamAttr( weight_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "fc7_weights" initializer=paddle.nn.initializer.KaimingUniform(),
name=self.full_name() + "fc7_weights",
), ),
bias_attr=ParamAttr(name="fc7_offset"), bias_attr=ParamAttr(name="fc7_offset"),
) )
......
...@@ -49,26 +49,26 @@ class SimpleLSTMRNN(fluid.Layer): ...@@ -49,26 +49,26 @@ class SimpleLSTMRNN(fluid.Layer):
for i in range(self._num_layers): for i in range(self._num_layers):
weight_1 = self.create_parameter( weight_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale low=-self._init_scale, high=self._init_scale
) )
), ),
shape=[self._hidden_size * 2, self._hidden_size * 4], shape=[self._hidden_size * 2, self._hidden_size * 4],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale low=-self._init_scale, high=self._init_scale
), ),
) )
self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1))
bias_1 = self.create_parameter( bias_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale low=-self._init_scale, high=self._init_scale
) )
), ),
shape=[self._hidden_size * 4], shape=[self._hidden_size * 4],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.Constant(0.0), default_initializer=paddle.nn.initializer.Constant(0.0),
) )
self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1))
...@@ -158,7 +158,7 @@ class PtbModel(fluid.Layer): ...@@ -158,7 +158,7 @@ class PtbModel(fluid.Layer):
sparse=False, sparse=False,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
), ),
), ),
...@@ -167,7 +167,7 @@ class PtbModel(fluid.Layer): ...@@ -167,7 +167,7 @@ class PtbModel(fluid.Layer):
attr=fluid.ParamAttr(), attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size], shape=[self.hidden_size, self.vocab_size],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale low=-self.init_scale, high=self.init_scale
), ),
) )
...@@ -175,7 +175,7 @@ class PtbModel(fluid.Layer): ...@@ -175,7 +175,7 @@ class PtbModel(fluid.Layer):
attr=fluid.ParamAttr(), attr=fluid.ParamAttr(),
shape=[self.vocab_size], shape=[self.vocab_size],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale low=-self.init_scale, high=self.init_scale
), ),
) )
......
...@@ -194,7 +194,7 @@ class ResNet(fluid.dygraph.Layer): ...@@ -194,7 +194,7 @@ class ResNet(fluid.dygraph.Layer):
self.pool2d_avg_output, self.pool2d_avg_output,
class_dim, class_dim,
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv) initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
), ),
) )
......
...@@ -131,7 +131,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -131,7 +131,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
num_channels, num_channels,
num_channels // reduction_ratio, num_channels // reduction_ratio,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv) initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
), ),
) )
stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0) stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0)
...@@ -139,7 +139,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -139,7 +139,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
num_channels // reduction_ratio, num_channels // reduction_ratio,
num_channels, num_channels,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv) initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
), ),
) )
...@@ -316,7 +316,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -316,7 +316,7 @@ class SeResNeXt(fluid.dygraph.Layer):
self.pool2d_avg_output, self.pool2d_avg_output,
class_dim, class_dim,
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv) initializer=paddle.nn.initializer.Uniform(-stdv, stdv)
), ),
) )
......
...@@ -230,7 +230,7 @@ class SkipGram(fluid.dygraph.Layer): ...@@ -230,7 +230,7 @@ class SkipGram(fluid.dygraph.Layer):
self.embedding_size, self.embedding_size,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=paddle.nn.initializer.Uniform(
low=-0.5 / self.embedding_size, low=-0.5 / self.embedding_size,
high=0.5 / self.embedding_size, high=0.5 / self.embedding_size,
), ),
...@@ -242,7 +242,7 @@ class SkipGram(fluid.dygraph.Layer): ...@@ -242,7 +242,7 @@ class SkipGram(fluid.dygraph.Layer):
self.embedding_size, self.embedding_size,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
name='embedding_out_para', name='embedding_out_para',
initializer=fluid.initializer.UniformInitializer( initializer=paddle.nn.initializer.Uniform(
low=-0.5 / self.embedding_size, low=-0.5 / self.embedding_size,
high=0.5 / self.embedding_size, high=0.5 / self.embedding_size,
), ),
......
...@@ -62,10 +62,10 @@ class PrePostProcessLayer(Layer): ...@@ -62,10 +62,10 @@ class PrePostProcessLayer(Layer):
paddle.nn.LayerNorm( paddle.nn.LayerNorm(
normalized_shape=d_model, normalized_shape=d_model,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0) initializer=paddle.nn.initializer.Constant(1.0)
), ),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0) initializer=paddle.nn.initializer.Constant(0.0)
), ),
), ),
) )
...@@ -295,7 +295,7 @@ class Embedder(Layer): ...@@ -295,7 +295,7 @@ class Embedder(Layer):
vocab_size, vocab_size,
emb_dim, emb_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5) initializer=paddle.nn.initializer.Normal(0.0, emb_dim**-0.5)
), ),
) )
...@@ -330,7 +330,7 @@ class WrapEncoder(Layer): ...@@ -330,7 +330,7 @@ class WrapEncoder(Layer):
max_length, max_length,
self.emb_dim, self.emb_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
position_encoding_init(max_length, self.emb_dim) position_encoding_init(max_length, self.emb_dim)
), ),
trainable=False, trainable=False,
...@@ -522,7 +522,7 @@ class WrapDecoder(Layer): ...@@ -522,7 +522,7 @@ class WrapDecoder(Layer):
max_length, max_length,
self.emb_dim, self.emb_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
position_encoding_init(max_length, self.emb_dim) position_encoding_init(max_length, self.emb_dim)
), ),
trainable=False, trainable=False,
......
...@@ -253,10 +253,10 @@ class YOLOv3(fluid.dygraph.Layer): ...@@ -253,10 +253,10 @@ class YOLOv3(fluid.dygraph.Layer):
stride=1, stride=1,
padding=0, padding=0,
weight_attr=ParamAttr( weight_attr=ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02) initializer=paddle.nn.initializer.Normal(0.0, 0.02)
), ),
bias_attr=ParamAttr( bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0), initializer=paddle.nn.initializer.Constant(0.0),
regularizer=L2Decay(0.0), regularizer=L2Decay(0.0),
), ),
), ),
......
...@@ -70,7 +70,7 @@ def net(batch_size=4, lr=0.01): ...@@ -70,7 +70,7 @@ def net(batch_size=4, lr=0.01):
size=[dnn_input_dim, dnn_layer_dims[0]], size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="deep_embedding", name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
) )
...@@ -86,7 +86,7 @@ def net(batch_size=4, lr=0.01): ...@@ -86,7 +86,7 @@ def net(batch_size=4, lr=0.01):
size=[lr_input_dim, 1], size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="wide_embedding", name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
) )
...@@ -99,7 +99,7 @@ def net(batch_size=4, lr=0.01): ...@@ -99,7 +99,7 @@ def net(batch_size=4, lr=0.01):
size=dim, size=dim,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01) initializer=paddle.nn.initializer.Constant(value=0.01)
), ),
name='dnn-fc-%d' % i, name='dnn-fc-%d' % i,
) )
......
...@@ -27,7 +27,6 @@ from paddle.fluid.framework import ( ...@@ -27,7 +27,6 @@ from paddle.fluid.framework import (
Operator, Operator,
convert_np_dtype_to_dtype_, convert_np_dtype_to_dtype_,
) )
from paddle.fluid.initializer import NumpyArrayInitializer
from paddle.static.quantization import ( from paddle.static.quantization import (
QuantizationFreezePass, QuantizationFreezePass,
QuantizationTransformPass, QuantizationTransformPass,
...@@ -305,7 +304,7 @@ def create_fake_model(program_config): ...@@ -305,7 +304,7 @@ def create_fake_model(program_config):
shape=tensor_config.shape, shape=tensor_config.shape,
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
name=name, name=name,
initializer=NumpyArrayInitializer(tensor_config.data), initializer=paddle.nn.initializer.Assign(tensor_config.data),
) )
in_vars = [] in_vars = []
for name in sorted(save_var_map.keys()): for name in sorted(save_var_map.keys()):
......
...@@ -30,7 +30,7 @@ class ConvBiasMkldnnFusePassSamePadTest(InferencePassTest): ...@@ -30,7 +30,7 @@ class ConvBiasMkldnnFusePassSamePadTest(InferencePassTest):
name="data", shape=[-1, 3, 100, 100], dtype="float32" name="data", shape=[-1, 3, 100, 100], dtype="float32"
) )
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False), initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001, learning_rate=0.001,
) )
conv_out = paddle.static.nn.conv2d( conv_out = paddle.static.nn.conv2d(
...@@ -63,7 +63,7 @@ class ConvBiasMkldnnFusePassValidPadTest(ConvBiasMkldnnFusePassSamePadTest): ...@@ -63,7 +63,7 @@ class ConvBiasMkldnnFusePassValidPadTest(ConvBiasMkldnnFusePassSamePadTest):
name="data", shape=[-1, 3, 100, 100], dtype="float32" name="data", shape=[-1, 3, 100, 100], dtype="float32"
) )
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False), initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001, learning_rate=0.001,
) )
conv_out = paddle.static.nn.conv2d( conv_out = paddle.static.nn.conv2d(
...@@ -89,7 +89,7 @@ class ConvBiasMkldnnFusePassExplictPadTest(ConvBiasMkldnnFusePassSamePadTest): ...@@ -89,7 +89,7 @@ class ConvBiasMkldnnFusePassExplictPadTest(ConvBiasMkldnnFusePassSamePadTest):
name="data", shape=[-1, 3, 100, 100], dtype="float32" name="data", shape=[-1, 3, 100, 100], dtype="float32"
) )
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False), initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001, learning_rate=0.001,
) )
conv_out = paddle.static.nn.conv2d( conv_out = paddle.static.nn.conv2d(
...@@ -114,7 +114,7 @@ class ConvBiasMkldnnFusePassGroupTest(ConvBiasMkldnnFusePassSamePadTest): ...@@ -114,7 +114,7 @@ class ConvBiasMkldnnFusePassGroupTest(ConvBiasMkldnnFusePassSamePadTest):
name="data", shape=[-1, 3, 100, 100], dtype="float32" name="data", shape=[-1, 3, 100, 100], dtype="float32"
) )
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False), initializer=paddle.nn.initializer.XavierUniform(),
learning_rate=0.001, learning_rate=0.001,
) )
conv_out = paddle.static.nn.conv2d( conv_out = paddle.static.nn.conv2d(
...@@ -145,7 +145,7 @@ class ConvBiasMkldnnFusePassDialtionsGroupsTest( ...@@ -145,7 +145,7 @@ class ConvBiasMkldnnFusePassDialtionsGroupsTest(
name="data", shape=[-1, 3, 100, 100], dtype="float32" name="data", shape=[-1, 3, 100, 100], dtype="float32"
) )
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False), initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001, learning_rate=0.001,
) )
conv_out = paddle.static.nn.conv2d( conv_out = paddle.static.nn.conv2d(
...@@ -173,7 +173,7 @@ class ConvTransposeMkldnnFusePassDialtionsGroupsTest(InferencePassTest): ...@@ -173,7 +173,7 @@ class ConvTransposeMkldnnFusePassDialtionsGroupsTest(InferencePassTest):
with fluid.program_guard(self.main_program, self.startup_program): with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[-1, 3, 5, 5], dtype="float32") data = fluid.data(name="data", shape=[-1, 3, 5, 5], dtype="float32")
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Xavier(uniform=False), initializer=paddle.nn.initializer.XavierNormal(),
learning_rate=0.001, learning_rate=0.001,
) )
conv_out = paddle.static.nn.conv2d_transpose( conv_out = paddle.static.nn.conv2d_transpose(
......
...@@ -180,11 +180,11 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest): ...@@ -180,11 +180,11 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest):
) )
param_attr = fluid.ParamAttr( param_attr = fluid.ParamAttr(
name='instance_norm_w', name='instance_norm_w',
initializer=fluid.initializer.Constant(value=1.0), initializer=paddle.nn.initializer.Constant(value=1.0),
) )
bias_attr = fluid.ParamAttr( bias_attr = fluid.ParamAttr(
name='instance_norm_b', name='instance_norm_b',
initializer=fluid.initializer.Constant(value=0.0), initializer=paddle.nn.initializer.Constant(value=0.0),
) )
out = paddle.static.nn.instance_norm( out = paddle.static.nn.instance_norm(
input=data, param_attr=param_attr, bias_attr=bias_attr input=data, param_attr=param_attr, bias_attr=bias_attr
......
...@@ -114,11 +114,11 @@ class TestBatchNorm(unittest.TestCase): ...@@ -114,11 +114,11 @@ class TestBatchNorm(unittest.TestCase):
shape[1], shape[1],
is_test=is_test, is_test=is_test,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0), initializer=paddle.nn.initializer.Constant(1.0),
trainable=False, trainable=False,
), ),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0), initializer=paddle.nn.initializer.Constant(0.0),
trainable=False, trainable=False,
), ),
trainable_statistics=trainable_statistics, trainable_statistics=trainable_statistics,
...@@ -262,7 +262,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase): ...@@ -262,7 +262,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase):
net1 = paddle.nn.BatchNorm( net1 = paddle.nn.BatchNorm(
6, 6,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0) initializer=paddle.nn.initializer.Constant(1.0)
), ),
use_global_stats=self.use_global_stats, use_global_stats=self.use_global_stats,
trainable_statistics=self.trainable_statistics, trainable_statistics=self.trainable_statistics,
......
...@@ -322,12 +322,12 @@ class TestNetWithEpsilonTensor(unittest.TestCase): ...@@ -322,12 +322,12 @@ class TestNetWithEpsilonTensor(unittest.TestCase):
label_np = np.random.randint(2, size=(2, 1)).astype('int64') label_np = np.random.randint(2, size=(2, 1)).astype('int64')
weight_attr1 = paddle.ParamAttr( weight_attr1 = paddle.ParamAttr(
name="weight1", name="weight1",
initializer=fluid.initializer.Constant(value=1.0), initializer=paddle.nn.initializer.Constant(value=1.0),
trainable=True, trainable=True,
) )
weight_attr2 = paddle.ParamAttr( weight_attr2 = paddle.ParamAttr(
name="weight2", name="weight2",
initializer=fluid.initializer.Constant(value=2.0), initializer=paddle.nn.initializer.Constant(value=2.0),
trainable=True, trainable=True,
) )
clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0)
......
...@@ -298,7 +298,7 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest): ...@@ -298,7 +298,7 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest):
weight_attr = fluid.ParamAttr( weight_attr = fluid.ParamAttr(
name=self.input_names['Params'][0], name=self.input_names['Params'][0],
learning_rate=0.5, learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
self.inputs['Params'][self.input_names['Params'][0]] self.inputs['Params'][self.input_names['Params'][0]]
), ),
trainable=True, trainable=True,
...@@ -306,7 +306,7 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest): ...@@ -306,7 +306,7 @@ class TestRunProgramOpWithFC(RunProgramNPUOpTest):
bias_attr = fluid.ParamAttr( bias_attr = fluid.ParamAttr(
name=self.input_names['Params'][1], name=self.input_names['Params'][1],
learning_rate=0.5, learning_rate=0.5,
initializer=fluid.initializer.NumpyArrayInitializer( initializer=paddle.nn.initializer.Assign(
self.inputs['Params'][self.input_names['Params'][1]] self.inputs['Params'][self.input_names['Params'][1]]
), ),
trainable=True, trainable=True,
......
...@@ -41,7 +41,7 @@ class SimpleNet(fluid.Layer): ...@@ -41,7 +41,7 @@ class SimpleNet(fluid.Layer):
self.hidden_size, self.hidden_size,
sparse=is_sparse, sparse=is_sparse,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale low=-init_scale, high=init_scale
) )
), ),
...@@ -50,7 +50,7 @@ class SimpleNet(fluid.Layer): ...@@ -50,7 +50,7 @@ class SimpleNet(fluid.Layer):
attr=fluid.ParamAttr(), attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size], shape=[self.hidden_size, self.vocab_size],
dtype=dtype, dtype=dtype,
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale low=-self.init_scale, high=self.init_scale
), ),
) )
...@@ -58,7 +58,7 @@ class SimpleNet(fluid.Layer): ...@@ -58,7 +58,7 @@ class SimpleNet(fluid.Layer):
attr=fluid.ParamAttr(), attr=fluid.ParamAttr(),
shape=[self.vocab_size], shape=[self.vocab_size],
dtype=dtype, dtype=dtype,
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale low=-self.init_scale, high=self.init_scale
), ),
) )
......
...@@ -26,7 +26,7 @@ def simple_fc_net_with_inputs(img, label, class_num=10): ...@@ -26,7 +26,7 @@ def simple_fc_net_with_inputs(img, label, class_num=10):
size=100, size=100,
activation='relu', activation='relu',
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=1.0) initializer=paddle.nn.initializer.Constant(value=1.0)
), ),
) )
prediction = paddle.static.nn.fc( prediction = paddle.static.nn.fc(
...@@ -53,7 +53,7 @@ def batchnorm_fc_with_inputs(img, label, class_num=10): ...@@ -53,7 +53,7 @@ def batchnorm_fc_with_inputs(img, label, class_num=10):
size=200, size=200,
activation='relu', activation='relu',
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=1.0) initializer=paddle.nn.initializer.Constant(value=1.0)
), ),
) )
......
...@@ -25,11 +25,9 @@ paddle.enable_static() ...@@ -25,11 +25,9 @@ paddle.enable_static()
def get_param_attr(weight, bias): def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr( weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight) initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
) )
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr return weight_attr, bias_attr
......
...@@ -30,11 +30,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE ...@@ -30,11 +30,9 @@ OUT_SIZE = 2 * MODEL_PARALLEL_SIZE
def get_param_attr(weight, bias): def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr( weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight) initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
) )
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr return weight_attr, bias_attr
......
...@@ -25,11 +25,9 @@ paddle.enable_static() ...@@ -25,11 +25,9 @@ paddle.enable_static()
def get_param_attr(weight, bias): def get_param_attr(weight, bias):
weight_attr = paddle.ParamAttr( weight_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(weight) initializer=paddle.nn.initializer.Assign(weight)
)
bias_attr = paddle.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(bias)
) )
bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias))
return weight_attr, bias_attr return weight_attr, bias_attr
......
...@@ -767,12 +767,12 @@ class TestAdamOptimizer(unittest.TestCase): ...@@ -767,12 +767,12 @@ class TestAdamOptimizer(unittest.TestCase):
label_np = np.random.randint(2, size=(2, 1)).astype('int64') label_np = np.random.randint(2, size=(2, 1)).astype('int64')
weight_attr1 = paddle.ParamAttr( weight_attr1 = paddle.ParamAttr(
name="weight1", name="weight1",
initializer=fluid.initializer.Constant(value=1.0), initializer=paddle.nn.initializer.Constant(value=1.0),
trainable=True, trainable=True,
) )
weight_attr2 = paddle.ParamAttr( weight_attr2 = paddle.ParamAttr(
name="weight2", name="weight2",
initializer=fluid.initializer.Constant(value=2.0), initializer=paddle.nn.initializer.Constant(value=2.0),
trainable=True, trainable=True,
) )
clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0)
...@@ -926,7 +926,7 @@ class TestAdamOptimizer(unittest.TestCase): ...@@ -926,7 +926,7 @@ class TestAdamOptimizer(unittest.TestCase):
main = fluid.Program() main = fluid.Program()
weight_attr = paddle.ParamAttr( weight_attr = paddle.ParamAttr(
name="weight1", name="weight1",
initializer=fluid.initializer.Constant(value=1.0), initializer=paddle.nn.initializer.Constant(value=1.0),
regularizer=fluid.regularizer.L1DecayRegularizer( regularizer=fluid.regularizer.L1DecayRegularizer(
regularization_coeff=0.1 regularization_coeff=0.1
), ),
......
...@@ -39,7 +39,6 @@ from paddle.distributed.auto_parallel.partitioner import Partitioner ...@@ -39,7 +39,6 @@ from paddle.distributed.auto_parallel.partitioner import Partitioner
from paddle.distributed.auto_parallel.reshard import Resharder from paddle.distributed.auto_parallel.reshard import Resharder
from paddle.distributed.fleet import auto from paddle.distributed.fleet import auto
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.initializer import NumpyArrayInitializer
if os.getenv("CUDA_VISIBLE_DEVICES") is not None: if os.getenv("CUDA_VISIBLE_DEVICES") is not None:
os.environ["CUDA_VISIBLE_DEVICES"] = "" os.environ["CUDA_VISIBLE_DEVICES"] = ""
...@@ -373,10 +372,18 @@ class MLPLayer(nn.Layer): ...@@ -373,10 +372,18 @@ class MLPLayer(nn.Layer):
arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model))
arr2 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr2 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward))
arr3 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) arr3 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model))
weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) weight_attr0 = paddle.ParamAttr(
weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) initializer=paddle.nn.initializer.Assign(arr0)
weight_attr2 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr2)) )
weight_attr3 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr3)) weight_attr1 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr1)
)
weight_attr2 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr2)
)
weight_attr3 = paddle.ParamAttr(
initializer=paddle.nn.initializer.Assign(arr3)
)
bias_attr = None bias_attr = None
self.linear0 = nn.Linear( self.linear0 = nn.Linear(
d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import unittest import unittest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -22,7 +23,7 @@ class TestAvoidTwiceInitialization(unittest.TestCase): ...@@ -22,7 +23,7 @@ class TestAvoidTwiceInitialization(unittest.TestCase):
cur_program = fluid.Program() cur_program = fluid.Program()
cur_block = cur_program.current_block() cur_block = cur_program.current_block()
var = cur_block.create_parameter( var = cur_block.create_parameter(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
shape=[2, 2], shape=[2, 2],
dtype='float32', dtype='float32',
name='var_a', name='var_a',
...@@ -40,7 +41,7 @@ class TestAvoidTwiceInitialization(unittest.TestCase): ...@@ -40,7 +41,7 @@ class TestAvoidTwiceInitialization(unittest.TestCase):
attrs={'ring_id': 0}, attrs={'ring_id': 0},
) )
var2 = cur_block.create_parameter( var2 = cur_block.create_parameter(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
shape=[2, 2], shape=[2, 2],
dtype='float32', dtype='float32',
name='var_a', name='var_a',
......
...@@ -26,7 +26,7 @@ class L1(fluid.Layer): ...@@ -26,7 +26,7 @@ class L1(fluid.Layer):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self._param_attr = fluid.ParamAttr( self._param_attr = fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1) initializer=paddle.nn.initializer.Constant(value=0.1)
) )
self.w1 = self.create_parameter( self.w1 = self.create_parameter(
attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False
......
...@@ -183,11 +183,11 @@ class TestBatchNorm(unittest.TestCase): ...@@ -183,11 +183,11 @@ class TestBatchNorm(unittest.TestCase):
shape[1], shape[1],
is_test=is_test, is_test=is_test,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0), initializer=paddle.nn.initializer.Constant(1.0),
trainable=False, trainable=False,
), ),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0), initializer=paddle.nn.initializer.Constant(0.0),
trainable=False, trainable=False,
), ),
trainable_statistics=trainable_statistics, trainable_statistics=trainable_statistics,
...@@ -378,7 +378,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase): ...@@ -378,7 +378,7 @@ class TestBatchNormUseGlobalStats(unittest.TestCase):
net1 = paddle.nn.BatchNorm( net1 = paddle.nn.BatchNorm(
6, 6,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0) initializer=paddle.nn.initializer.Constant(1.0)
), ),
use_global_stats=self.use_global_stats, use_global_stats=self.use_global_stats,
trainable_statistics=self.trainable_statistics, trainable_statistics=self.trainable_statistics,
......
...@@ -48,7 +48,7 @@ class TestDoubleGrad(unittest.TestCase): ...@@ -48,7 +48,7 @@ class TestDoubleGrad(unittest.TestCase):
name='x', name='x',
shape=[1], shape=[1],
dtype='float32', dtype='float32',
default_initializer=fluid.initializer.Constant(3), default_initializer=paddle.nn.initializer.Constant(3),
) )
(grad1,) = fluid.gradients(net(x), x) # 2x = 6 (grad1,) = fluid.gradients(net(x), x) # 2x = 6
z = net(x - grad1) z = net(x - grad1)
...@@ -69,7 +69,7 @@ class TestDoubleGrad(unittest.TestCase): ...@@ -69,7 +69,7 @@ class TestDoubleGrad(unittest.TestCase):
name='x', name='x',
shape=[1], shape=[1],
dtype='float32', dtype='float32',
default_initializer=fluid.initializer.Constant(1), default_initializer=paddle.nn.initializer.Constant(1),
) )
y = x * x y = x * x
(dx1,) = fluid.gradients(y, x) (dx1,) = fluid.gradients(y, x)
......
...@@ -41,7 +41,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase): ...@@ -41,7 +41,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase):
size=[10000, 10], size=[10000, 10],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="embedding", name="embedding",
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
) )
......
...@@ -18,7 +18,6 @@ import numpy as np ...@@ -18,7 +18,6 @@ import numpy as np
import paddle import paddle
import paddle.fluid.dygraph as dg import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle import fluid, nn from paddle import fluid, nn
...@@ -110,11 +109,11 @@ class Conv2DTestCase(unittest.TestCase): ...@@ -110,11 +109,11 @@ class Conv2DTestCase(unittest.TestCase):
else (-1, self.num_channels, -1, -1) else (-1, self.num_channels, -1, -1)
) )
x_var = fluid.data("input", input_shape, dtype=self.dtype) x_var = fluid.data("input", input_shape, dtype=self.dtype)
weight_attr = I.NumpyArrayInitializer(self.weight) weight_attr = paddle.nn.initializer.Assign(self.weight)
if self.bias is None: if self.bias is None:
bias_attr = False bias_attr = False
else: else:
bias_attr = I.NumpyArrayInitializer(self.bias) bias_attr = paddle.nn.initializer.Assign(self.bias)
if self.padding_mode != 'zeros': if self.padding_mode != 'zeros':
x_var = F.pad( x_var = F.pad(
x_var, x_var,
......
...@@ -18,7 +18,6 @@ import numpy as np ...@@ -18,7 +18,6 @@ import numpy as np
import paddle import paddle
import paddle.fluid.dygraph as dg import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle import fluid, nn from paddle import fluid, nn
...@@ -101,11 +100,11 @@ class Conv2DTransposeTestCase(unittest.TestCase): ...@@ -101,11 +100,11 @@ class Conv2DTransposeTestCase(unittest.TestCase):
else (-1, self.num_channels, -1, -1) else (-1, self.num_channels, -1, -1)
) )
x_var = fluid.data("input", input_shape, dtype=self.dtype) x_var = fluid.data("input", input_shape, dtype=self.dtype)
weight_attr = I.NumpyArrayInitializer(self.weight) weight_attr = paddle.nn.initializer.Assign(self.weight)
if self.bias is None: if self.bias is None:
bias_attr = False bias_attr = False
else: else:
bias_attr = I.NumpyArrayInitializer(self.bias) bias_attr = paddle.nn.initializer.Assign(self.bias)
y_var = paddle.static.nn.conv2d_transpose( y_var = paddle.static.nn.conv2d_transpose(
x_var, x_var,
......
...@@ -18,7 +18,6 @@ import numpy as np ...@@ -18,7 +18,6 @@ import numpy as np
import paddle import paddle
import paddle.fluid.dygraph as dg import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle import fluid, nn from paddle import fluid, nn
...@@ -97,11 +96,11 @@ class Conv3DTestCase(unittest.TestCase): ...@@ -97,11 +96,11 @@ class Conv3DTestCase(unittest.TestCase):
else (-1, self.num_channels, -1, -1, -1) else (-1, self.num_channels, -1, -1, -1)
) )
x_var = fluid.data("input", input_shape, dtype=self.dtype) x_var = fluid.data("input", input_shape, dtype=self.dtype)
weight_attr = I.NumpyArrayInitializer(self.weight) weight_attr = paddle.nn.initializer.Assign(self.weight)
if self.bias is None: if self.bias is None:
bias_attr = False bias_attr = False
else: else:
bias_attr = I.NumpyArrayInitializer(self.bias) bias_attr = paddle.nn.initializer.Assign(self.bias)
y_var = paddle.static.nn.conv3d( y_var = paddle.static.nn.conv3d(
x_var, x_var,
self.num_filters, self.num_filters,
......
...@@ -18,7 +18,6 @@ import numpy as np ...@@ -18,7 +18,6 @@ import numpy as np
import paddle import paddle
import paddle.fluid.dygraph as dg import paddle.fluid.dygraph as dg
import paddle.fluid.initializer as I
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle import fluid, nn from paddle import fluid, nn
...@@ -99,11 +98,11 @@ class Conv3DTransposeTestCase(unittest.TestCase): ...@@ -99,11 +98,11 @@ class Conv3DTransposeTestCase(unittest.TestCase):
else (-1, self.num_channels, -1, -1, -1) else (-1, self.num_channels, -1, -1, -1)
) )
x_var = fluid.data("input", input_shape, dtype=self.dtype) x_var = fluid.data("input", input_shape, dtype=self.dtype)
weight_attr = I.NumpyArrayInitializer(self.weight) weight_attr = paddle.nn.initializer.Assign(self.weight)
if self.bias is None: if self.bias is None:
bias_attr = False bias_attr = False
else: else:
bias_attr = I.NumpyArrayInitializer(self.bias) bias_attr = paddle.nn.initializer.Assign(self.bias)
y_var = paddle.static.nn.conv3d_transpose( y_var = paddle.static.nn.conv3d_transpose(
x_var, x_var,
self.num_filters, self.num_filters,
......
...@@ -120,15 +120,15 @@ class TestGeneratorSeed(unittest.TestCase): ...@@ -120,15 +120,15 @@ class TestGeneratorSeed(unittest.TestCase):
result_1 = paddle.static.nn.fc( result_1 = paddle.static.nn.fc(
x, x,
size=10, size=10,
weight_attr=fluid.initializer.TruncatedNormal( weight_attr=paddle.nn.initializer.TruncatedNormal(
loc=0.0, scale=2.0 mean=0.0, std=2.0
), ),
) )
result_2 = paddle.static.nn.fc( result_2 = paddle.static.nn.fc(
x, x,
size=10, size=10,
weight_attr=fluid.initializer.TruncatedNormal( weight_attr=paddle.nn.initializer.TruncatedNormal(
loc=0.0, scale=2.0 mean=0.0, std=2.0
), ),
) )
......
...@@ -61,7 +61,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): ...@@ -61,7 +61,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer):
size=hidden_size, size=hidden_size,
activation='tanh', activation='tanh',
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=1.0) initializer=paddle.nn.initializer.Constant(value=1.0)
), ),
) )
......
...@@ -58,9 +58,7 @@ def cnn_model(data): ...@@ -58,9 +58,7 @@ def cnn_model(data):
size=SIZE, size=SIZE,
activation="softmax", activation="softmax",
weight_attr=fluid.param_attr.ParamAttr( weight_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer( initializer=paddle.nn.initializer.Normal(loc=0.0, scale=scale)
loc=0.0, scale=scale
)
), ),
) )
return predict return predict
......
...@@ -120,7 +120,7 @@ class Test_Detach(unittest.TestCase): ...@@ -120,7 +120,7 @@ class Test_Detach(unittest.TestCase):
initializer=paddle.nn.initializer.Constant(5.0) initializer=paddle.nn.initializer.Constant(5.0)
) )
linear_b_param_attrs = fluid.ParamAttr( linear_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(6.0) initializer=paddle.nn.initializer.Constant(6.0)
) )
linear = Linear( linear = Linear(
4, 4,
...@@ -132,7 +132,7 @@ class Test_Detach(unittest.TestCase): ...@@ -132,7 +132,7 @@ class Test_Detach(unittest.TestCase):
initializer=paddle.nn.initializer.Constant(7.0) initializer=paddle.nn.initializer.Constant(7.0)
) )
linear1_b_param_attrs = fluid.ParamAttr( linear1_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(8.0) initializer=paddle.nn.initializer.Constant(8.0)
) )
linear1 = Linear( linear1 = Linear(
10, 10,
......
...@@ -58,7 +58,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): ...@@ -58,7 +58,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
size=[1000000000, 100000], size=[1000000000, 100000],
param_attr=paddle.fluid.ParamAttr( param_attr=paddle.fluid.ParamAttr(
name="embedding", name="embedding",
initializer=paddle.fluid.initializer.Constant(value=0.01), initializer=paddle.paddle.nn.initializer.Constant(value=0.01),
), ),
is_sparse=True, is_sparse=True,
) )
......
...@@ -89,7 +89,7 @@ class TestDistFleetHeterProgram(unittest.TestCase): ...@@ -89,7 +89,7 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=[100001, 10], size=[100001, 10],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="SparseFeatFactors", name="SparseFeatFactors",
initializer=fluid.initializer.Uniform(), initializer=paddle.nn.initializer.Uniform(),
), ),
) )
...@@ -103,8 +103,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): ...@@ -103,8 +103,8 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=400, size=400,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal( initializer=paddle.nn.initializer.Normal(
scale=1 / math.sqrt(concated.shape[1]) std=1 / math.sqrt(concated.shape[1])
) )
), ),
name="fc1", name="fc1",
...@@ -116,8 +116,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): ...@@ -116,8 +116,8 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=400, size=400,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal( initializer=paddle.nn.initializer.Normal(
scale=1 / math.sqrt(fc1.shape[1]) std=1 / math.sqrt(fc1.shape[1])
) )
), ),
name="fc2", name="fc2",
...@@ -129,8 +129,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): ...@@ -129,8 +129,8 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=400, size=400,
activation="relu", activation="relu",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal( initializer=paddle.nn.initializer.Normal(
scale=1 / math.sqrt(fc2.shape[1]) std=1 / math.sqrt(fc2.shape[1])
) )
), ),
name="fc3", name="fc3",
...@@ -142,8 +142,8 @@ class TestDistFleetHeterProgram(unittest.TestCase): ...@@ -142,8 +142,8 @@ class TestDistFleetHeterProgram(unittest.TestCase):
size=2, size=2,
activation="softmax", activation="softmax",
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal( initializer=paddle.nn.initializer.Normal(
scale=1 / math.sqrt(fc3.shape[1]) std=1 / math.sqrt(fc3.shape[1])
) )
), ),
) )
......
...@@ -77,7 +77,7 @@ class TestPSMinimize(unittest.TestCase): ...@@ -77,7 +77,7 @@ class TestPSMinimize(unittest.TestCase):
input=q, input=q,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -91,7 +91,7 @@ class TestPSMinimize(unittest.TestCase): ...@@ -91,7 +91,7 @@ class TestPSMinimize(unittest.TestCase):
x=q_ss, x=q_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__", name="__q_fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -107,7 +107,7 @@ class TestPSMinimize(unittest.TestCase): ...@@ -107,7 +107,7 @@ class TestPSMinimize(unittest.TestCase):
input=pt, input=pt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -121,7 +121,7 @@ class TestPSMinimize(unittest.TestCase): ...@@ -121,7 +121,7 @@ class TestPSMinimize(unittest.TestCase):
x=pt_ss, x=pt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -136,7 +136,7 @@ class TestPSMinimize(unittest.TestCase): ...@@ -136,7 +136,7 @@ class TestPSMinimize(unittest.TestCase):
input=nt, input=nt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -150,7 +150,7 @@ class TestPSMinimize(unittest.TestCase): ...@@ -150,7 +150,7 @@ class TestPSMinimize(unittest.TestCase):
x=nt_ss, x=nt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
......
...@@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss, x=q_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__", name="__q_fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss, x=pt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss, x=nt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
......
...@@ -77,7 +77,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -77,7 +77,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=q, input=q,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -91,7 +91,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -91,7 +91,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss, x=q_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__", name="__q_fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -107,7 +107,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -107,7 +107,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=pt, input=pt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -121,7 +121,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -121,7 +121,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss, x=pt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -136,7 +136,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=nt, input=nt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -150,7 +150,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -150,7 +150,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss, x=nt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
......
...@@ -80,7 +80,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -80,7 +80,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=q, input=q,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -94,7 +94,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -94,7 +94,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss, x=q_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__", name="__q_fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=pt, input=pt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -124,7 +124,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -124,7 +124,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss, x=pt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -139,7 +139,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -139,7 +139,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=nt, input=nt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -153,7 +153,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -153,7 +153,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss, x=nt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
......
...@@ -81,7 +81,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -81,7 +81,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=q, input=q,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -95,7 +95,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -95,7 +95,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss, x=q_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__", name="__q_fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -111,7 +111,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -111,7 +111,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=pt, input=pt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss, x=pt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=nt, input=nt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -154,7 +154,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -154,7 +154,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss, x=nt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
......
...@@ -80,7 +80,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -80,7 +80,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=q, input=q,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -95,7 +95,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -95,7 +95,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss, x=q_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__", name="__q_fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -111,7 +111,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -111,7 +111,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=pt, input=pt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss, x=pt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -140,7 +140,7 @@ class TestPSPassWithBow(unittest.TestCase):
input=nt, input=nt,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -154,7 +154,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -154,7 +154,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss, x=nt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
......
...@@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -78,7 +78,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -93,7 +93,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=q_ss, x=q_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__q_fc__", name="__q_fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -110,7 +110,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -125,7 +125,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=pt_ss, x=pt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
...@@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -141,7 +141,7 @@ class TestPSPassWithBow(unittest.TestCase):
is_distributed=is_distributed, is_distributed=is_distributed,
size=[dict_dim, emb_dim], size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__emb__", name="__emb__",
learning_rate=emb_lr, learning_rate=emb_lr,
), ),
...@@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase): ...@@ -156,7 +156,7 @@ class TestPSPassWithBow(unittest.TestCase):
x=nt_ss, x=nt_ss,
size=hid_dim, size=hid_dim,
weight_attr=fluid.ParamAttr( weight_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01), initializer=paddle.nn.initializer.Constant(value=0.01),
name="__fc__", name="__fc__",
learning_rate=base_lr, learning_rate=base_lr,
), ),
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册