Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
ab16d8ce
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ab16d8ce
编写于
3月 17, 2022
作者:
H
huangyuxin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change default initializer to kaiming_uniform, test=asr
上级
f55fb384
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
423 addition
and
19 deletion
+423
-19
examples/aishell/asr1/conf/conformer.yaml
examples/aishell/asr1/conf/conformer.yaml
+1
-0
paddlespeech/s2t/__init__.py
paddlespeech/s2t/__init__.py
+6
-0
paddlespeech/s2t/models/u2/u2.py
paddlespeech/s2t/models/u2/u2.py
+8
-1
paddlespeech/s2t/modules/attention.py
paddlespeech/s2t/modules/attention.py
+1
-1
paddlespeech/s2t/modules/conformer_convolution.py
paddlespeech/s2t/modules/conformer_convolution.py
+14
-4
paddlespeech/s2t/modules/decoder.py
paddlespeech/s2t/modules/decoder.py
+13
-2
paddlespeech/s2t/modules/decoder_layer.py
paddlespeech/s2t/modules/decoder_layer.py
+21
-3
paddlespeech/s2t/modules/encoder.py
paddlespeech/s2t/modules/encoder.py
+8
-1
paddlespeech/s2t/modules/encoder_layer.py
paddlespeech/s2t/modules/encoder_layer.py
+35
-7
paddlespeech/s2t/modules/initializer.py
paddlespeech/s2t/modules/initializer.py
+272
-0
paddlespeech/s2t/modules/nets_utils.py
paddlespeech/s2t/modules/nets_utils.py
+44
-0
未找到文件。
examples/aishell/asr1/conf/conformer.yaml
浏览文件 @
ab16d8ce
...
@@ -37,6 +37,7 @@ model_conf:
...
@@ -37,6 +37,7 @@ model_conf:
ctc_weight
:
0.3
ctc_weight
:
0.3
lsm_weight
:
0.1
# label smoothing option
lsm_weight
:
0.1
# label smoothing option
length_normalized_loss
:
false
length_normalized_loss
:
false
init_type
:
'
kaiming_uniform'
###########################################
###########################################
# Data #
# Data #
...
...
paddlespeech/s2t/__init__.py
浏览文件 @
ab16d8ce
...
@@ -21,6 +21,7 @@ from paddle import nn
...
@@ -21,6 +21,7 @@ from paddle import nn
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.nn
import
functional
as
F
from
paddle.nn
import
functional
as
F
from
paddlespeech.s2t.modules
import
initializer
from
paddlespeech.s2t.utils.log
import
Log
from
paddlespeech.s2t.utils.log
import
Log
#TODO(Hui Zhang): remove fluid import
#TODO(Hui Zhang): remove fluid import
...
@@ -505,3 +506,8 @@ if not hasattr(paddle.nn, 'LayerDict'):
...
@@ -505,3 +506,8 @@ if not hasattr(paddle.nn, 'LayerDict'):
logger
.
debug
(
logger
.
debug
(
"register user LayerDict to paddle.nn, remove this when fixed!"
)
"register user LayerDict to paddle.nn, remove this when fixed!"
)
setattr
(
paddle
.
nn
,
'LayerDict'
,
LayerDict
)
setattr
(
paddle
.
nn
,
'LayerDict'
,
LayerDict
)
"""
hack KaiminigUniform: change limit from np.sqrt(6.0 / float(fan_in)) to np.sqrt(1.0 / float(fan_in))
"""
paddle
.
nn
.
initializer
.
KaimingUniform
=
initializer
.
KaimingUniform
paddlespeech/s2t/models/u2/u2.py
浏览文件 @
ab16d8ce
...
@@ -41,6 +41,7 @@ from paddlespeech.s2t.modules.mask import make_pad_mask
...
@@ -41,6 +41,7 @@ from paddlespeech.s2t.modules.mask import make_pad_mask
from
paddlespeech.s2t.modules.mask
import
mask_finished_preds
from
paddlespeech.s2t.modules.mask
import
mask_finished_preds
from
paddlespeech.s2t.modules.mask
import
mask_finished_scores
from
paddlespeech.s2t.modules.mask
import
mask_finished_scores
from
paddlespeech.s2t.modules.mask
import
subsequent_mask
from
paddlespeech.s2t.modules.mask
import
subsequent_mask
from
paddlespeech.s2t.modules.nets_utils
import
initialize
from
paddlespeech.s2t.utils
import
checkpoint
from
paddlespeech.s2t.utils
import
checkpoint
from
paddlespeech.s2t.utils
import
layer_tools
from
paddlespeech.s2t.utils
import
layer_tools
from
paddlespeech.s2t.utils.ctc_utils
import
remove_duplicates_and_blank
from
paddlespeech.s2t.utils.ctc_utils
import
remove_duplicates_and_blank
...
@@ -72,6 +73,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
...
@@ -72,6 +73,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
assert
0.0
<=
ctc_weight
<=
1.0
,
ctc_weight
assert
0.0
<=
ctc_weight
<=
1.0
,
ctc_weight
nn
.
Layer
.
__init__
(
self
)
nn
.
Layer
.
__init__
(
self
)
# note that eos is the same as sos (equivalent ID)
# note that eos is the same as sos (equivalent ID)
self
.
sos
=
vocab_size
-
1
self
.
sos
=
vocab_size
-
1
self
.
eos
=
vocab_size
-
1
self
.
eos
=
vocab_size
-
1
...
@@ -780,9 +782,14 @@ class U2DecodeModel(U2BaseModel):
...
@@ -780,9 +782,14 @@ class U2DecodeModel(U2BaseModel):
class
U2Model
(
U2DecodeModel
):
class
U2Model
(
U2DecodeModel
):
def
__init__
(
self
,
configs
:
dict
):
def
__init__
(
self
,
configs
:
dict
):
model_conf
=
configs
.
get
(
'model_conf'
,
dict
())
init_type
=
model_conf
.
get
(
"init_type"
,
None
)
if
init_type
is
not
None
:
logger
.
info
(
f
"Use
{
init_type
}
initializer as default initializer"
)
initialize
(
self
,
init_type
)
vocab_size
,
encoder
,
decoder
,
ctc
=
U2Model
.
_init_from_config
(
configs
)
vocab_size
,
encoder
,
decoder
,
ctc
=
U2Model
.
_init_from_config
(
configs
)
nn
.
initializer
.
set_global_initializer
(
None
)
model_conf
=
configs
.
get
(
'model_conf'
,
dict
())
super
().
__init__
(
super
().
__init__
(
vocab_size
=
vocab_size
,
vocab_size
=
vocab_size
,
encoder
=
encoder
,
encoder
=
encoder
,
...
...
paddlespeech/s2t/modules/attention.py
浏览文件 @
ab16d8ce
paddlespeech/s2t/modules/conformer_convolution.py
浏览文件 @
ab16d8ce
...
@@ -87,10 +87,20 @@ class ConvolutionModule(nn.Layer):
...
@@ -87,10 +87,20 @@ class ConvolutionModule(nn.Layer):
assert
norm
in
[
'batch_norm'
,
'layer_norm'
]
assert
norm
in
[
'batch_norm'
,
'layer_norm'
]
if
norm
==
"batch_norm"
:
if
norm
==
"batch_norm"
:
self
.
use_layer_norm
=
False
self
.
use_layer_norm
=
False
self
.
norm
=
nn
.
BatchNorm1D
(
channels
)
self
.
norm
=
nn
.
BatchNorm1D
(
channels
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
else
:
else
:
self
.
use_layer_norm
=
True
self
.
use_layer_norm
=
True
self
.
norm
=
nn
.
LayerNorm
(
channels
)
self
.
norm
=
nn
.
LayerNorm
(
channels
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
self
.
pointwise_conv2
=
nn
.
Conv1D
(
self
.
pointwise_conv2
=
nn
.
Conv1D
(
channels
,
channels
,
...
...
paddlespeech/s2t/modules/decoder.py
浏览文件 @
ab16d8ce
...
@@ -76,19 +76,30 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
...
@@ -76,19 +76,30 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
concat_after
:
bool
=
False
,
):
concat_after
:
bool
=
False
,
):
assert
check_argument_types
()
assert
check_argument_types
()
nn
.
Layer
.
__init__
(
self
)
nn
.
Layer
.
__init__
(
self
)
self
.
selfattention_layer_type
=
'selfattn'
self
.
selfattention_layer_type
=
'selfattn'
attention_dim
=
encoder_output_size
attention_dim
=
encoder_output_size
if
input_layer
==
"embed"
:
if
input_layer
==
"embed"
:
self
.
embed
=
nn
.
Sequential
(
self
.
embed
=
nn
.
Sequential
(
nn
.
Embedding
(
vocab_size
,
attention_dim
),
nn
.
Embedding
(
vocab_size
,
attention_dim
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Normal
())),
PositionalEncoding
(
attention_dim
,
positional_dropout_rate
),
)
PositionalEncoding
(
attention_dim
,
positional_dropout_rate
),
)
else
:
else
:
raise
ValueError
(
f
"only 'embed' is supported:
{
input_layer
}
"
)
raise
ValueError
(
f
"only 'embed' is supported:
{
input_layer
}
"
)
self
.
normalize_before
=
normalize_before
self
.
normalize_before
=
normalize_before
self
.
after_norm
=
nn
.
LayerNorm
(
attention_dim
,
epsilon
=
1e-12
)
self
.
after_norm
=
nn
.
LayerNorm
(
attention_dim
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
self
.
use_output_layer
=
use_output_layer
self
.
use_output_layer
=
use_output_layer
self
.
output_layer
=
nn
.
Linear
(
attention_dim
,
vocab_size
)
self
.
output_layer
=
nn
.
Linear
(
attention_dim
,
vocab_size
)
...
...
paddlespeech/s2t/modules/decoder_layer.py
浏览文件 @
ab16d8ce
...
@@ -62,9 +62,27 @@ class DecoderLayer(nn.Layer):
...
@@ -62,9 +62,27 @@ class DecoderLayer(nn.Layer):
self
.
self_attn
=
self_attn
self
.
self_attn
=
self_attn
self
.
src_attn
=
src_attn
self
.
src_attn
=
src_attn
self
.
feed_forward
=
feed_forward
self
.
feed_forward
=
feed_forward
self
.
norm1
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
)
self
.
norm1
=
nn
.
LayerNorm
(
self
.
norm2
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
)
size
,
self
.
norm3
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
)
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
self
.
norm2
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
self
.
norm3
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
normalize_before
=
normalize_before
self
.
normalize_before
=
normalize_before
self
.
concat_after
=
concat_after
self
.
concat_after
=
concat_after
...
...
paddlespeech/s2t/modules/encoder.py
浏览文件 @
ab16d8ce
...
@@ -129,7 +129,13 @@ class BaseEncoder(nn.Layer):
...
@@ -129,7 +129,13 @@ class BaseEncoder(nn.Layer):
d_model
=
output_size
,
dropout_rate
=
positional_dropout_rate
),
)
d_model
=
output_size
,
dropout_rate
=
positional_dropout_rate
),
)
self
.
normalize_before
=
normalize_before
self
.
normalize_before
=
normalize_before
self
.
after_norm
=
nn
.
LayerNorm
(
output_size
,
epsilon
=
1e-12
)
self
.
after_norm
=
nn
.
LayerNorm
(
output_size
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
self
.
static_chunk_size
=
static_chunk_size
self
.
static_chunk_size
=
static_chunk_size
self
.
use_dynamic_chunk
=
use_dynamic_chunk
self
.
use_dynamic_chunk
=
use_dynamic_chunk
self
.
use_dynamic_left_chunk
=
use_dynamic_left_chunk
self
.
use_dynamic_left_chunk
=
use_dynamic_left_chunk
...
@@ -457,6 +463,7 @@ class ConformerEncoder(BaseEncoder):
...
@@ -457,6 +463,7 @@ class ConformerEncoder(BaseEncoder):
cnn_module_norm (str): cnn conv norm type, Optional['batch_norm','layer_norm']
cnn_module_norm (str): cnn conv norm type, Optional['batch_norm','layer_norm']
"""
"""
assert
check_argument_types
()
assert
check_argument_types
()
super
().
__init__
(
input_size
,
output_size
,
attention_heads
,
linear_units
,
super
().
__init__
(
input_size
,
output_size
,
attention_heads
,
linear_units
,
num_blocks
,
dropout_rate
,
positional_dropout_rate
,
num_blocks
,
dropout_rate
,
positional_dropout_rate
,
attention_dropout_rate
,
input_layer
,
attention_dropout_rate
,
input_layer
,
...
...
paddlespeech/s2t/modules/encoder_layer.py
浏览文件 @
ab16d8ce
...
@@ -174,18 +174,46 @@ class ConformerEncoderLayer(nn.Layer):
...
@@ -174,18 +174,46 @@ class ConformerEncoderLayer(nn.Layer):
self
.
feed_forward
=
feed_forward
self
.
feed_forward
=
feed_forward
self
.
feed_forward_macaron
=
feed_forward_macaron
self
.
feed_forward_macaron
=
feed_forward_macaron
self
.
conv_module
=
conv_module
self
.
conv_module
=
conv_module
self
.
norm_ff
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
)
# for the FNN module
self
.
norm_ff
=
nn
.
LayerNorm
(
self
.
norm_mha
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
)
# for the MHA module
size
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
# for the FNN module
self
.
norm_mha
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
# for the MHA module
if
feed_forward_macaron
is
not
None
:
if
feed_forward_macaron
is
not
None
:
self
.
norm_ff_macaron
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
)
self
.
norm_ff_macaron
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
self
.
ff_scale
=
0.5
self
.
ff_scale
=
0.5
else
:
else
:
self
.
ff_scale
=
1.0
self
.
ff_scale
=
1.0
if
self
.
conv_module
is
not
None
:
if
self
.
conv_module
is
not
None
:
self
.
norm_conv
=
nn
.
LayerNorm
(
self
.
norm_conv
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
)
# for the CNN module
size
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
# for the CNN module
self
.
norm_final
=
nn
.
LayerNorm
(
self
.
norm_final
=
nn
.
LayerNorm
(
size
,
epsilon
=
1e-12
)
# for the final output of the block
size
,
epsilon
=
1e-12
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
1.0
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0.0
)))
# for the final output of the block
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
size
=
size
self
.
size
=
size
self
.
normalize_before
=
normalize_before
self
.
normalize_before
=
normalize_before
...
...
paddlespeech/s2t/modules/initializer.py
0 → 100644
浏览文件 @
ab16d8ce
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
paddle.fluid
import
framework
from
paddle.fluid.framework
import
in_dygraph_mode
,
default_main_program
import
numpy
as
np
from
paddle.fluid.core
import
VarDesc
from
paddle.fluid
import
unique_name
__all__
=
[
'MSRAInitializer'
]
class
Initializer
(
object
):
"""Base class for variable initializers
Defines the common interface of variable initializers.
They add operations to the init program that are used
to initialize variables. Users should not use this class
directly, but need to use one of its implementations.
"""
def
__init__
(
self
):
pass
def
__call__
(
self
,
param
,
block
=
None
):
"""Add corresponding initialization operations to the network
"""
raise
NotImplementedError
()
def
_check_block
(
self
,
block
):
if
block
is
None
:
block
=
default_main_program
().
global_block
()
return
block
def
_compute_fans
(
self
,
var
):
"""Compute the fan_in and the fan_out for layers
This method computes the fan_in and the fan_out
for neural network layers, if not specified. It is
not possible to perfectly estimate fan_in and fan_out.
This method will estimate it correctly for matrix multiply and
convolutions.
Args:
var: variable for which fan_in and fan_out have to be computed
Returns:
tuple of two integers (fan_in, fan_out)
"""
shape
=
var
.
shape
if
not
shape
or
len
(
shape
)
==
0
:
fan_in
=
fan_out
=
1
elif
len
(
shape
)
==
1
:
fan_in
=
fan_out
=
shape
[
0
]
elif
len
(
shape
)
==
2
:
# This is the case for simple matrix multiply
fan_in
=
shape
[
0
]
fan_out
=
shape
[
1
]
else
:
# Assume this to be a convolutional kernel
# In PaddlePaddle, the shape of the kernel is like:
# [num_filters, num_filter_channels, ...] where the remaining
# dimensions are the filter_size
receptive_field_size
=
np
.
prod
(
shape
[
2
:])
fan_in
=
shape
[
1
]
*
receptive_field_size
fan_out
=
shape
[
0
]
*
receptive_field_size
return
(
fan_in
,
fan_out
)
class
MSRAInitializer
(
Initializer
):
r
"""Implements the MSRA initializer a.k.a. Kaiming Initializer
This class implements the weight initialization from the paper
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
robust initialization method that particularly considers the rectifier
nonlinearities. In case of Uniform distribution, the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float32|None): fan_in for MSRAInitializer. If None, it is\
inferred from the variable. default is None.
seed (int32): random seed
Note:
It is recommended to set fan_in to None for most cases.
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
paddle.enable_static()
x = fluid.data(name="data", shape=[8, 32, 32], dtype="float32")
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.MSRA(uniform=False))
"""
def
__init__
(
self
,
uniform
=
True
,
fan_in
=
None
,
seed
=
0
):
"""Constructor for MSRAInitializer
"""
assert
uniform
is
not
None
assert
seed
is
not
None
super
(
MSRAInitializer
,
self
).
__init__
()
self
.
_uniform
=
uniform
self
.
_fan_in
=
fan_in
self
.
_seed
=
seed
def
__call__
(
self
,
var
,
block
=
None
):
"""Initialize the input tensor with MSRA initialization.
Args:
var(Tensor): Tensor that needs to be initialized.
block(Block, optional): The block in which initialization ops
should be added. Used in static graph only, default None.
Returns:
The initialization op
"""
block
=
self
.
_check_block
(
block
)
assert
isinstance
(
var
,
framework
.
Variable
)
assert
isinstance
(
block
,
framework
.
Block
)
f_in
,
f_out
=
self
.
_compute_fans
(
var
)
# If fan_in is passed, use it
fan_in
=
f_in
if
self
.
_fan_in
is
None
else
self
.
_fan_in
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
or
(
var
.
dtype
==
VarDesc
.
VarType
.
BF16
and
not
self
.
_uniform
):
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
(
[
'masra_init'
,
var
.
name
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
if
self
.
_uniform
:
limit
=
np
.
sqrt
(
1.0
/
float
(
fan_in
))
op
=
block
.
append_op
(
type
=
"uniform_random"
,
inputs
=
{},
outputs
=
{
"Out"
:
out_var
},
attrs
=
{
"shape"
:
out_var
.
shape
,
"dtype"
:
int
(
out_dtype
),
"min"
:
-
limit
,
"max"
:
limit
,
"seed"
:
self
.
_seed
},
stop_gradient
=
True
)
else
:
std
=
np
.
sqrt
(
2.0
/
float
(
fan_in
))
op
=
block
.
append_op
(
type
=
"gaussian_random"
,
outputs
=
{
"Out"
:
out_var
},
attrs
=
{
"shape"
:
out_var
.
shape
,
"dtype"
:
int
(
out_dtype
),
"mean"
:
0.0
,
"std"
:
std
,
"seed"
:
self
.
_seed
},
stop_gradient
=
True
)
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
or
(
var
.
dtype
==
VarDesc
.
VarType
.
BF16
and
not
self
.
_uniform
):
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
if
not
framework
.
in_dygraph_mode
():
var
.
op
=
op
return
op
class
KaimingUniform
(
MSRAInitializer
):
r
"""Implements the Kaiming Uniform initializer
This class implements the weight initialization from the paper
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
robust initialization method that particularly considers the rectifier
nonlinearities.
In case of Uniform distribution, the range is [-x, x], where
.. math::
x = \sqrt{\frac{6.0}{fan\_in}}
Args:
fan_in (float32|None): fan_in for Kaiming uniform Initializer. If None, it is\
inferred from the variable. default is None.
Note:
It is recommended to set fan_in to None for most cases.
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
linear = nn.Linear(2,
4,
weight_attr=nn.initializer.KaimingUniform())
data = paddle.rand([30, 10, 2], dtype='float32')
res = linear(data)
"""
def
__init__
(
self
,
fan_in
=
None
):
super
(
KaimingUniform
,
self
).
__init__
(
uniform
=
True
,
fan_in
=
fan_in
,
seed
=
0
)
# We short the class name, since users will use the initializer with the package
# name. The sample code:
#
# import paddle.fluid as fluid
#
# hidden = fluid.layers.fc(...,
# param_attr=ParamAttr(fluid.initializer.Xavier()))
#
# It is no need to add an `Initializer` as the class suffix
MSRA
=
MSRAInitializer
paddlespeech/s2t/modules/nets_utils.py
0 → 100644
浏览文件 @
ab16d8ce
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from
paddle
import
nn
from
typeguard
import
check_argument_types
def
initialize
(
model
:
nn
.
Layer
,
init
:
str
):
"""Initialize weights of a neural network module.
Parameters are initialized using the given method or distribution.
Custom initialization routines can be implemented into submodules
Args:
model (nn.Layer): Target.
init (str): Method of initialization.
"""
assert
check_argument_types
()
if
init
==
"xavier_uniform"
:
nn
.
initializer
.
set_global_initializer
(
nn
.
initializer
.
XavierUniform
(),
nn
.
initializer
.
Constant
())
elif
init
==
"xavier_normal"
:
nn
.
initializer
.
set_global_initializer
(
nn
.
initializer
.
XavierNormal
(),
nn
.
initializer
.
Constant
())
elif
init
==
"kaiming_uniform"
:
nn
.
initializer
.
set_global_initializer
(
nn
.
initializer
.
KaimingUniform
(),
nn
.
initializer
.
KaimingUniform
())
elif
init
==
"kaiming_normal"
:
nn
.
initializer
.
set_global_initializer
(
nn
.
initializer
.
KaimingNormal
(),
nn
.
initializer
.
Constant
())
else
:
raise
ValueError
(
"Unknown initialization: "
+
init
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录