提交 28658cc1 编写于 作者: H Hui Zhang

fix cmvn and print prarams

上级 48f4bda3
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "comic-scotland",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/workspace/DeepSpeech-2.x\n"
]
},
{
"data": {
"text/plain": [
"'/workspace/DeepSpeech-2.x'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%cd ..\n",
"%pwd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "trying-palestinian",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/layers/utils.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
" def convert_to_list(value, n, name, dtype=np.int):\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:93] register user softmax to paddle, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:97] register user log_softmax to paddle, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:101] register user sigmoid to paddle, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:105] register user log_sigmoid to paddle, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:109] register user relu to paddle, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:119] override cat of paddle if exists or register, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:133] override item of paddle.Tensor if exists or register, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:144] override long of paddle.Tensor if exists or register, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:164] override new_full of paddle.Tensor if exists or register, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:179] override eq of paddle.Tensor if exists or register, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:185] override eq of paddle if exists or register, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:195] override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:212] override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:223] register user view to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:233] register user view_as to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:259] register user masked_fill to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:277] register user masked_fill_ to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:288] register user fill_ to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:298] register user repeat to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:303] register user softmax to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:308] register user sigmoid to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:312] register user relu to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:322] register user type_as to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:337] register user to to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:346] register user float to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:356] register user tolist to paddle.Tensor, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:371] register user glu to paddle.nn.functional, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:422] override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:428] register user Module to paddle.nn, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:434] register user ModuleList to paddle.nn, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:450] register user GLU to paddle.nn, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:483] register user ConstantPad2d to paddle.nn, remove this when fixed!\n",
"[WARNING 2021/04/16 08:20:33 __init__.py:489] register user export to paddle.jit, remove this when fixed!\n"
]
}
],
"source": [
"import numpy as np\n",
"import paddle\n",
"from yacs.config import CfgNode as CN\n",
"\n",
"from deepspeech.models.u2 import U2Model\n",
"from deepspeech.utils.layer_tools import print_params\n",
"from deepspeech.utils.layer_tools import summary"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "committed-glance",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
" and should_run_async(code)\n",
"[INFO 2021/04/16 08:20:34 u2.py:834] U2 Encoder type: conformer\n",
"[INFO 2021/04/16 08:20:34 u2.py:834] U2 Encoder type: conformer\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"encoder.embed.conv.0.weight | [256, 1, 3, 3] | 2304 | True\n",
"encoder.embed.conv.0.bias | [256] | 256 | True\n",
"encoder.embed.conv.2.weight | [256, 256, 3, 3] | 589824 | True\n",
"encoder.embed.conv.2.bias | [256] | 256 | True\n",
"encoder.embed.linear.weight | [4864, 256] | 1245184 | True\n",
"encoder.embed.linear.bias | [256] | 256 | True\n",
"encoder.after_norm.weight | [256] | 256 | True\n",
"encoder.after_norm.bias | [256] | 256 | True\n",
"encoder.encoders.0.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.0.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.0.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.0.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.0.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.0.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.0.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.0.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.0.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.0.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.0.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.0.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.0.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.0.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.0.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.0.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.0.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.0.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.0.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.0.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.0.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.0.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.0.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.0.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.0.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.0.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.0.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.0.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.0.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.0.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.0.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.0.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.0.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.0.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.0.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.0.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.0.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.0.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.0.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.0.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.0.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.1.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.1.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.1.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.1.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.1.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.1.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.1.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.1.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.1.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.1.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.1.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.1.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.1.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.1.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.1.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.1.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.1.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.1.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.1.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.1.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.1.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.1.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.1.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.1.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.1.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.1.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.1.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.1.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.1.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.1.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.1.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.1.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.1.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.1.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.1.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.1.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.1.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.1.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.1.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.1.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.1.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.2.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.2.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.2.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.2.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.2.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.2.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.2.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.2.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.2.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.2.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.2.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.2.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.2.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.2.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.2.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.2.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.2.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.2.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.2.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.2.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.2.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.2.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.2.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.2.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.2.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.2.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.2.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.2.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.2.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.2.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.2.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.2.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.2.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.2.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.2.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.2.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.2.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.2.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.2.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.2.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.2.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.3.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.3.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.3.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.3.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.3.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.3.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.3.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.3.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.3.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.3.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.3.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.3.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.3.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.3.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.3.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.3.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.3.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.3.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.3.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.3.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.3.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.3.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.3.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.3.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.3.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.3.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.3.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.3.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.3.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.3.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.3.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.3.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.3.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.3.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.3.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.3.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.3.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.3.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.3.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.3.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.3.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.4.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.4.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.4.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.4.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.4.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.4.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.4.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.4.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.4.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.4.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.4.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.4.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.4.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.4.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.4.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.4.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.4.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.4.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.4.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.4.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.4.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.4.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.4.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.4.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.4.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.4.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.4.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.4.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.4.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.4.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.4.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.4.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.4.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.4.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.4.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.4.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.4.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.4.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.4.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.4.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.4.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.5.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.5.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.5.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.5.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.5.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.5.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.5.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.5.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.5.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.5.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.5.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.5.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.5.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.5.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.5.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.5.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.5.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.5.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.5.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.5.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.5.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.5.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.5.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.5.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.5.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.5.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.5.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.5.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.5.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.5.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.5.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.5.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.5.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.5.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.5.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.5.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.5.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.5.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.5.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.5.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.5.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.6.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.6.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.6.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.6.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.6.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.6.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.6.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.6.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.6.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.6.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.6.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.6.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.6.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.6.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.6.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.6.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.6.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.6.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.6.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.6.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.6.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.6.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.6.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.6.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.6.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.6.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.6.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.6.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.6.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.6.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.6.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.6.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.6.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.6.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.6.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.6.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.6.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.6.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.6.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.6.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.6.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.7.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.7.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.7.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.7.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.7.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.7.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.7.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.7.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.7.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.7.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.7.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.7.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.7.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.7.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.7.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.7.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.7.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.7.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.7.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.7.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.7.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.7.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.7.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.7.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.7.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.7.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.7.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.7.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.7.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.7.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.7.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.7.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.7.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.7.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.7.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.7.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.7.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.7.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.7.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.7.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.7.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.8.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.8.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.8.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.8.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.8.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.8.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.8.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.8.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.8.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.8.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.8.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.8.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.8.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.8.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.8.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.8.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.8.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.8.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.8.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.8.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.8.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.8.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.8.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.8.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.8.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.8.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.8.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.8.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.8.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.8.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.8.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.8.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.8.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.8.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.8.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.8.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.8.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.8.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.8.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.8.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.8.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.9.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.9.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.9.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.9.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.9.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.9.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.9.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.9.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.9.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.9.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.9.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.9.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.9.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.9.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.9.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.9.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.9.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.9.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.9.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.9.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.9.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.9.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.9.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.9.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.9.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.9.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.9.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.9.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.9.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.9.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.9.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.9.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.9.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.9.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.9.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.9.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.9.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.9.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.9.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.9.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.9.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.10.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.10.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.10.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.10.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.10.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.10.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.10.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.10.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.10.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.10.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.10.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.10.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.10.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.10.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.10.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.10.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.10.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.10.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.10.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.10.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.10.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.10.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.10.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.10.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.10.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.10.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.10.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.10.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.10.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.10.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.10.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.10.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.10.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.10.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.10.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.10.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.10.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.10.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.10.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.10.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.10.concat_linear.bias | [256] | 256 | True\n",
"encoder.encoders.11.self_attn.pos_bias_u | [4, 64] | 256 | True\n",
"encoder.encoders.11.self_attn.pos_bias_v | [4, 64] | 256 | True\n",
"encoder.encoders.11.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.11.self_attn.linear_q.bias | [256] | 256 | True\n",
"encoder.encoders.11.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.11.self_attn.linear_k.bias | [256] | 256 | True\n",
"encoder.encoders.11.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.11.self_attn.linear_v.bias | [256] | 256 | True\n",
"encoder.encoders.11.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.11.self_attn.linear_out.bias | [256] | 256 | True\n",
"encoder.encoders.11.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n",
"encoder.encoders.11.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.11.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.11.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.11.feed_forward.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.11.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n",
"encoder.encoders.11.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n",
"encoder.encoders.11.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n",
"encoder.encoders.11.feed_forward_macaron.w_2.bias | [256] | 256 | True\n",
"encoder.encoders.11.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n",
"encoder.encoders.11.conv_module.pointwise_conv1.bias | [512] | 512 | True\n",
"encoder.encoders.11.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n",
"encoder.encoders.11.conv_module.depthwise_conv.bias | [256] | 256 | True\n",
"encoder.encoders.11.conv_module.norm.weight | [256] | 256 | True\n",
"encoder.encoders.11.conv_module.norm.bias | [256] | 256 | True\n",
"encoder.encoders.11.conv_module.norm._mean | [256] | 256 | False\n",
"encoder.encoders.11.conv_module.norm._variance | [256] | 256 | False\n",
"encoder.encoders.11.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n",
"encoder.encoders.11.conv_module.pointwise_conv2.bias | [256] | 256 | True\n",
"encoder.encoders.11.norm_ff.weight | [256] | 256 | True\n",
"encoder.encoders.11.norm_ff.bias | [256] | 256 | True\n",
"encoder.encoders.11.norm_mha.weight | [256] | 256 | True\n",
"encoder.encoders.11.norm_mha.bias | [256] | 256 | True\n",
"encoder.encoders.11.norm_ff_macaron.weight | [256] | 256 | True\n",
"encoder.encoders.11.norm_ff_macaron.bias | [256] | 256 | True\n",
"encoder.encoders.11.norm_conv.weight | [256] | 256 | True\n",
"encoder.encoders.11.norm_conv.bias | [256] | 256 | True\n",
"encoder.encoders.11.norm_final.weight | [256] | 256 | True\n",
"encoder.encoders.11.norm_final.bias | [256] | 256 | True\n",
"encoder.encoders.11.concat_linear.weight | [512, 256] | 131072 | True\n",
"encoder.encoders.11.concat_linear.bias | [256] | 256 | True\n",
"decoder.embed.0.weight | [4223, 256] | 1081088 | True\n",
"decoder.after_norm.weight | [256] | 256 | True\n",
"decoder.after_norm.bias | [256] | 256 | True\n",
"decoder.output_layer.weight | [256, 4223] | 1081088 | True\n",
"decoder.output_layer.bias | [4223] | 4223 | True\n",
"decoder.decoders.0.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.0.self_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.0.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.0.self_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.0.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.0.self_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.0.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.0.self_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.0.src_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.0.src_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.0.src_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.0.src_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.0.src_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.0.src_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.0.src_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.0.src_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.0.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"decoder.decoders.0.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"decoder.decoders.0.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"decoder.decoders.0.feed_forward.w_2.bias | [256] | 256 | True\n",
"decoder.decoders.0.norm1.weight | [256] | 256 | True\n",
"decoder.decoders.0.norm1.bias | [256] | 256 | True\n",
"decoder.decoders.0.norm2.weight | [256] | 256 | True\n",
"decoder.decoders.0.norm2.bias | [256] | 256 | True\n",
"decoder.decoders.0.norm3.weight | [256] | 256 | True\n",
"decoder.decoders.0.norm3.bias | [256] | 256 | True\n",
"decoder.decoders.0.concat_linear1.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.0.concat_linear1.bias | [256] | 256 | True\n",
"decoder.decoders.0.concat_linear2.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.0.concat_linear2.bias | [256] | 256 | True\n",
"decoder.decoders.1.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.1.self_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.1.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.1.self_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.1.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.1.self_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.1.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.1.self_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.1.src_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.1.src_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.1.src_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.1.src_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.1.src_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.1.src_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.1.src_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.1.src_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.1.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"decoder.decoders.1.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"decoder.decoders.1.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"decoder.decoders.1.feed_forward.w_2.bias | [256] | 256 | True\n",
"decoder.decoders.1.norm1.weight | [256] | 256 | True\n",
"decoder.decoders.1.norm1.bias | [256] | 256 | True\n",
"decoder.decoders.1.norm2.weight | [256] | 256 | True\n",
"decoder.decoders.1.norm2.bias | [256] | 256 | True\n",
"decoder.decoders.1.norm3.weight | [256] | 256 | True\n",
"decoder.decoders.1.norm3.bias | [256] | 256 | True\n",
"decoder.decoders.1.concat_linear1.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.1.concat_linear1.bias | [256] | 256 | True\n",
"decoder.decoders.1.concat_linear2.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.1.concat_linear2.bias | [256] | 256 | True\n",
"decoder.decoders.2.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.2.self_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.2.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.2.self_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.2.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.2.self_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.2.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.2.self_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.2.src_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.2.src_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.2.src_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.2.src_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.2.src_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.2.src_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.2.src_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.2.src_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.2.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"decoder.decoders.2.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"decoder.decoders.2.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"decoder.decoders.2.feed_forward.w_2.bias | [256] | 256 | True\n",
"decoder.decoders.2.norm1.weight | [256] | 256 | True\n",
"decoder.decoders.2.norm1.bias | [256] | 256 | True\n",
"decoder.decoders.2.norm2.weight | [256] | 256 | True\n",
"decoder.decoders.2.norm2.bias | [256] | 256 | True\n",
"decoder.decoders.2.norm3.weight | [256] | 256 | True\n",
"decoder.decoders.2.norm3.bias | [256] | 256 | True\n",
"decoder.decoders.2.concat_linear1.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.2.concat_linear1.bias | [256] | 256 | True\n",
"decoder.decoders.2.concat_linear2.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.2.concat_linear2.bias | [256] | 256 | True\n",
"decoder.decoders.3.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.3.self_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.3.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.3.self_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.3.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.3.self_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.3.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.3.self_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.3.src_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.3.src_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.3.src_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.3.src_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.3.src_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.3.src_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.3.src_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.3.src_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.3.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"decoder.decoders.3.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"decoder.decoders.3.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"decoder.decoders.3.feed_forward.w_2.bias | [256] | 256 | True\n",
"decoder.decoders.3.norm1.weight | [256] | 256 | True\n",
"decoder.decoders.3.norm1.bias | [256] | 256 | True\n",
"decoder.decoders.3.norm2.weight | [256] | 256 | True\n",
"decoder.decoders.3.norm2.bias | [256] | 256 | True\n",
"decoder.decoders.3.norm3.weight | [256] | 256 | True\n",
"decoder.decoders.3.norm3.bias | [256] | 256 | True\n",
"decoder.decoders.3.concat_linear1.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.3.concat_linear1.bias | [256] | 256 | True\n",
"decoder.decoders.3.concat_linear2.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.3.concat_linear2.bias | [256] | 256 | True\n",
"decoder.decoders.4.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.4.self_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.4.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.4.self_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.4.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.4.self_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.4.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.4.self_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.4.src_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.4.src_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.4.src_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.4.src_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.4.src_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.4.src_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.4.src_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.4.src_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.4.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"decoder.decoders.4.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"decoder.decoders.4.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"decoder.decoders.4.feed_forward.w_2.bias | [256] | 256 | True\n",
"decoder.decoders.4.norm1.weight | [256] | 256 | True\n",
"decoder.decoders.4.norm1.bias | [256] | 256 | True\n",
"decoder.decoders.4.norm2.weight | [256] | 256 | True\n",
"decoder.decoders.4.norm2.bias | [256] | 256 | True\n",
"decoder.decoders.4.norm3.weight | [256] | 256 | True\n",
"decoder.decoders.4.norm3.bias | [256] | 256 | True\n",
"decoder.decoders.4.concat_linear1.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.4.concat_linear1.bias | [256] | 256 | True\n",
"decoder.decoders.4.concat_linear2.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.4.concat_linear2.bias | [256] | 256 | True\n",
"decoder.decoders.5.self_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.5.self_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.5.self_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.5.self_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.5.self_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.5.self_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.5.self_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.5.self_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.5.src_attn.linear_q.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.5.src_attn.linear_q.bias | [256] | 256 | True\n",
"decoder.decoders.5.src_attn.linear_k.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.5.src_attn.linear_k.bias | [256] | 256 | True\n",
"decoder.decoders.5.src_attn.linear_v.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.5.src_attn.linear_v.bias | [256] | 256 | True\n",
"decoder.decoders.5.src_attn.linear_out.weight | [256, 256] | 65536 | True\n",
"decoder.decoders.5.src_attn.linear_out.bias | [256] | 256 | True\n",
"decoder.decoders.5.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n",
"decoder.decoders.5.feed_forward.w_1.bias | [2048] | 2048 | True\n",
"decoder.decoders.5.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n",
"decoder.decoders.5.feed_forward.w_2.bias | [256] | 256 | True\n",
"decoder.decoders.5.norm1.weight | [256] | 256 | True\n",
"decoder.decoders.5.norm1.bias | [256] | 256 | True\n",
"decoder.decoders.5.norm2.weight | [256] | 256 | True\n",
"decoder.decoders.5.norm2.bias | [256] | 256 | True\n",
"decoder.decoders.5.norm3.weight | [256] | 256 | True\n",
"decoder.decoders.5.norm3.bias | [256] | 256 | True\n",
"decoder.decoders.5.concat_linear1.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.5.concat_linear1.bias | [256] | 256 | True\n",
"decoder.decoders.5.concat_linear2.weight | [512, 256] | 131072 | True\n",
"decoder.decoders.5.concat_linear2.bias | [256] | 256 | True\n",
"ctc.ctc_lo.weight | [256, 4223] | 1081088 | True\n",
"ctc.ctc_lo.bias | [4223] | 4223 | True\n",
"Total parameters: 687.0, 49347582.0 elements.\n"
]
}
],
"source": [
"conf_str='examples/aishell/s1/conf/conformer.yaml'\n",
"cfg = CN().load_cfg(open(conf_str))\n",
"cfg.model.input_dim = 80\n",
"cfg.model.output_dim = 4223\n",
"cfg.model.cmvn_file = \"/workspace/wenet/examples/aishell/s0/raw_wav/train/global_cmvn\"\n",
"cfg.model.cmvn_file_type = 'json'\n",
"cfg.freeze()\n",
"\n",
"model = U2Model(cfg.model)\n",
"print_params(model)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "reserved-nightlife",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"encoder.global_cmvn.mean | [80] | 80\n",
"encoder.global_cmvn.istd | [80] | 80\n",
"encoder.embed.conv.0.weight | [256, 1, 3, 3] | 2304\n",
"encoder.embed.conv.0.bias | [256] | 256\n",
"encoder.embed.conv.2.weight | [256, 256, 3, 3] | 589824\n",
"encoder.embed.conv.2.bias | [256] | 256\n",
"encoder.embed.linear.weight | [4864, 256] | 1245184\n",
"encoder.embed.linear.bias | [256] | 256\n",
"encoder.after_norm.weight | [256] | 256\n",
"encoder.after_norm.bias | [256] | 256\n",
"encoder.encoders.0.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.0.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.0.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.0.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.0.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.0.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.0.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.0.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.0.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.0.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.0.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.0.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.0.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.0.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.0.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.0.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.0.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.0.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.0.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.0.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.0.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.0.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.0.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.0.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.0.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.0.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.0.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.0.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.0.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.0.norm_ff.weight | [256] | 256\n",
"encoder.encoders.0.norm_ff.bias | [256] | 256\n",
"encoder.encoders.0.norm_mha.weight | [256] | 256\n",
"encoder.encoders.0.norm_mha.bias | [256] | 256\n",
"encoder.encoders.0.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.0.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.0.norm_conv.weight | [256] | 256\n",
"encoder.encoders.0.norm_conv.bias | [256] | 256\n",
"encoder.encoders.0.norm_final.weight | [256] | 256\n",
"encoder.encoders.0.norm_final.bias | [256] | 256\n",
"encoder.encoders.0.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.0.concat_linear.bias | [256] | 256\n",
"encoder.encoders.1.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.1.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.1.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.1.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.1.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.1.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.1.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.1.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.1.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.1.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.1.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.1.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.1.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.1.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.1.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.1.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.1.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.1.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.1.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.1.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.1.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.1.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.1.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.1.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.1.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.1.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.1.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.1.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.1.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.1.norm_ff.weight | [256] | 256\n",
"encoder.encoders.1.norm_ff.bias | [256] | 256\n",
"encoder.encoders.1.norm_mha.weight | [256] | 256\n",
"encoder.encoders.1.norm_mha.bias | [256] | 256\n",
"encoder.encoders.1.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.1.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.1.norm_conv.weight | [256] | 256\n",
"encoder.encoders.1.norm_conv.bias | [256] | 256\n",
"encoder.encoders.1.norm_final.weight | [256] | 256\n",
"encoder.encoders.1.norm_final.bias | [256] | 256\n",
"encoder.encoders.1.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.1.concat_linear.bias | [256] | 256\n",
"encoder.encoders.2.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.2.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.2.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.2.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.2.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.2.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.2.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.2.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.2.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.2.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.2.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.2.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.2.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.2.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.2.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.2.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.2.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.2.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.2.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.2.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.2.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.2.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.2.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.2.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.2.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.2.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.2.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.2.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.2.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.2.norm_ff.weight | [256] | 256\n",
"encoder.encoders.2.norm_ff.bias | [256] | 256\n",
"encoder.encoders.2.norm_mha.weight | [256] | 256\n",
"encoder.encoders.2.norm_mha.bias | [256] | 256\n",
"encoder.encoders.2.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.2.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.2.norm_conv.weight | [256] | 256\n",
"encoder.encoders.2.norm_conv.bias | [256] | 256\n",
"encoder.encoders.2.norm_final.weight | [256] | 256\n",
"encoder.encoders.2.norm_final.bias | [256] | 256\n",
"encoder.encoders.2.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.2.concat_linear.bias | [256] | 256\n",
"encoder.encoders.3.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.3.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.3.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.3.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.3.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.3.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.3.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.3.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.3.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.3.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.3.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.3.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.3.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.3.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.3.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.3.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.3.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.3.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.3.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.3.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.3.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.3.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.3.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.3.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.3.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.3.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.3.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.3.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.3.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.3.norm_ff.weight | [256] | 256\n",
"encoder.encoders.3.norm_ff.bias | [256] | 256\n",
"encoder.encoders.3.norm_mha.weight | [256] | 256\n",
"encoder.encoders.3.norm_mha.bias | [256] | 256\n",
"encoder.encoders.3.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.3.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.3.norm_conv.weight | [256] | 256\n",
"encoder.encoders.3.norm_conv.bias | [256] | 256\n",
"encoder.encoders.3.norm_final.weight | [256] | 256\n",
"encoder.encoders.3.norm_final.bias | [256] | 256\n",
"encoder.encoders.3.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.3.concat_linear.bias | [256] | 256\n",
"encoder.encoders.4.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.4.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.4.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.4.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.4.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.4.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.4.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.4.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.4.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.4.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.4.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.4.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.4.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.4.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.4.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.4.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.4.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.4.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.4.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.4.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.4.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.4.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.4.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.4.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.4.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.4.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.4.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.4.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.4.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.4.norm_ff.weight | [256] | 256\n",
"encoder.encoders.4.norm_ff.bias | [256] | 256\n",
"encoder.encoders.4.norm_mha.weight | [256] | 256\n",
"encoder.encoders.4.norm_mha.bias | [256] | 256\n",
"encoder.encoders.4.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.4.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.4.norm_conv.weight | [256] | 256\n",
"encoder.encoders.4.norm_conv.bias | [256] | 256\n",
"encoder.encoders.4.norm_final.weight | [256] | 256\n",
"encoder.encoders.4.norm_final.bias | [256] | 256\n",
"encoder.encoders.4.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.4.concat_linear.bias | [256] | 256\n",
"encoder.encoders.5.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.5.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.5.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.5.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.5.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.5.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.5.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.5.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.5.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.5.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.5.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.5.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.5.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.5.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.5.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.5.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.5.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.5.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.5.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.5.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.5.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.5.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.5.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.5.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.5.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.5.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.5.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.5.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.5.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.5.norm_ff.weight | [256] | 256\n",
"encoder.encoders.5.norm_ff.bias | [256] | 256\n",
"encoder.encoders.5.norm_mha.weight | [256] | 256\n",
"encoder.encoders.5.norm_mha.bias | [256] | 256\n",
"encoder.encoders.5.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.5.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.5.norm_conv.weight | [256] | 256\n",
"encoder.encoders.5.norm_conv.bias | [256] | 256\n",
"encoder.encoders.5.norm_final.weight | [256] | 256\n",
"encoder.encoders.5.norm_final.bias | [256] | 256\n",
"encoder.encoders.5.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.5.concat_linear.bias | [256] | 256\n",
"encoder.encoders.6.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.6.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.6.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.6.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.6.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.6.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.6.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.6.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.6.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.6.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.6.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.6.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.6.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.6.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.6.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.6.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.6.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.6.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.6.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.6.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.6.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.6.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.6.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.6.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.6.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.6.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.6.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.6.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.6.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.6.norm_ff.weight | [256] | 256\n",
"encoder.encoders.6.norm_ff.bias | [256] | 256\n",
"encoder.encoders.6.norm_mha.weight | [256] | 256\n",
"encoder.encoders.6.norm_mha.bias | [256] | 256\n",
"encoder.encoders.6.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.6.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.6.norm_conv.weight | [256] | 256\n",
"encoder.encoders.6.norm_conv.bias | [256] | 256\n",
"encoder.encoders.6.norm_final.weight | [256] | 256\n",
"encoder.encoders.6.norm_final.bias | [256] | 256\n",
"encoder.encoders.6.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.6.concat_linear.bias | [256] | 256\n",
"encoder.encoders.7.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.7.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.7.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.7.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.7.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.7.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.7.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.7.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.7.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.7.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.7.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.7.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.7.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.7.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.7.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.7.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.7.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.7.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.7.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.7.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.7.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.7.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.7.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.7.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.7.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.7.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.7.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.7.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.7.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.7.norm_ff.weight | [256] | 256\n",
"encoder.encoders.7.norm_ff.bias | [256] | 256\n",
"encoder.encoders.7.norm_mha.weight | [256] | 256\n",
"encoder.encoders.7.norm_mha.bias | [256] | 256\n",
"encoder.encoders.7.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.7.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.7.norm_conv.weight | [256] | 256\n",
"encoder.encoders.7.norm_conv.bias | [256] | 256\n",
"encoder.encoders.7.norm_final.weight | [256] | 256\n",
"encoder.encoders.7.norm_final.bias | [256] | 256\n",
"encoder.encoders.7.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.7.concat_linear.bias | [256] | 256\n",
"encoder.encoders.8.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.8.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.8.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.8.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.8.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.8.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.8.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.8.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.8.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.8.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.8.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.8.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.8.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.8.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.8.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.8.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.8.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.8.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.8.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.8.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.8.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.8.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.8.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.8.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.8.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.8.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.8.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.8.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.8.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.8.norm_ff.weight | [256] | 256\n",
"encoder.encoders.8.norm_ff.bias | [256] | 256\n",
"encoder.encoders.8.norm_mha.weight | [256] | 256\n",
"encoder.encoders.8.norm_mha.bias | [256] | 256\n",
"encoder.encoders.8.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.8.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.8.norm_conv.weight | [256] | 256\n",
"encoder.encoders.8.norm_conv.bias | [256] | 256\n",
"encoder.encoders.8.norm_final.weight | [256] | 256\n",
"encoder.encoders.8.norm_final.bias | [256] | 256\n",
"encoder.encoders.8.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.8.concat_linear.bias | [256] | 256\n",
"encoder.encoders.9.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.9.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.9.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.9.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.9.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.9.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.9.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.9.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.9.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.9.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.9.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.9.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.9.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.9.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.9.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.9.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.9.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.9.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.9.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.9.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.9.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.9.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.9.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.9.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.9.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.9.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.9.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.9.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.9.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.9.norm_ff.weight | [256] | 256\n",
"encoder.encoders.9.norm_ff.bias | [256] | 256\n",
"encoder.encoders.9.norm_mha.weight | [256] | 256\n",
"encoder.encoders.9.norm_mha.bias | [256] | 256\n",
"encoder.encoders.9.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.9.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.9.norm_conv.weight | [256] | 256\n",
"encoder.encoders.9.norm_conv.bias | [256] | 256\n",
"encoder.encoders.9.norm_final.weight | [256] | 256\n",
"encoder.encoders.9.norm_final.bias | [256] | 256\n",
"encoder.encoders.9.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.9.concat_linear.bias | [256] | 256\n",
"encoder.encoders.10.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.10.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.10.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.10.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.10.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.10.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.10.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.10.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.10.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.10.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.10.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.10.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.10.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.10.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.10.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.10.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.10.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.10.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.10.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.10.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.10.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.10.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.10.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.10.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.10.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.10.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.10.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.10.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.10.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.10.norm_ff.weight | [256] | 256\n",
"encoder.encoders.10.norm_ff.bias | [256] | 256\n",
"encoder.encoders.10.norm_mha.weight | [256] | 256\n",
"encoder.encoders.10.norm_mha.bias | [256] | 256\n",
"encoder.encoders.10.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.10.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.10.norm_conv.weight | [256] | 256\n",
"encoder.encoders.10.norm_conv.bias | [256] | 256\n",
"encoder.encoders.10.norm_final.weight | [256] | 256\n",
"encoder.encoders.10.norm_final.bias | [256] | 256\n",
"encoder.encoders.10.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.10.concat_linear.bias | [256] | 256\n",
"encoder.encoders.11.self_attn.pos_bias_u | [4, 64] | 256\n",
"encoder.encoders.11.self_attn.pos_bias_v | [4, 64] | 256\n",
"encoder.encoders.11.self_attn.linear_q.weight | [256, 256] | 65536\n",
"encoder.encoders.11.self_attn.linear_q.bias | [256] | 256\n",
"encoder.encoders.11.self_attn.linear_k.weight | [256, 256] | 65536\n",
"encoder.encoders.11.self_attn.linear_k.bias | [256] | 256\n",
"encoder.encoders.11.self_attn.linear_v.weight | [256, 256] | 65536\n",
"encoder.encoders.11.self_attn.linear_v.bias | [256] | 256\n",
"encoder.encoders.11.self_attn.linear_out.weight | [256, 256] | 65536\n",
"encoder.encoders.11.self_attn.linear_out.bias | [256] | 256\n",
"encoder.encoders.11.self_attn.linear_pos.weight | [256, 256] | 65536\n",
"encoder.encoders.11.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.11.feed_forward.w_1.bias | [2048] | 2048\n",
"encoder.encoders.11.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.11.feed_forward.w_2.bias | [256] | 256\n",
"encoder.encoders.11.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n",
"encoder.encoders.11.feed_forward_macaron.w_1.bias | [2048] | 2048\n",
"encoder.encoders.11.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n",
"encoder.encoders.11.feed_forward_macaron.w_2.bias | [256] | 256\n",
"encoder.encoders.11.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n",
"encoder.encoders.11.conv_module.pointwise_conv1.bias | [512] | 512\n",
"encoder.encoders.11.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n",
"encoder.encoders.11.conv_module.depthwise_conv.bias | [256] | 256\n",
"encoder.encoders.11.conv_module.norm.weight | [256] | 256\n",
"encoder.encoders.11.conv_module.norm.bias | [256] | 256\n",
"encoder.encoders.11.conv_module.norm._mean | [256] | 256\n",
"encoder.encoders.11.conv_module.norm._variance | [256] | 256\n",
"encoder.encoders.11.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n",
"encoder.encoders.11.conv_module.pointwise_conv2.bias | [256] | 256\n",
"encoder.encoders.11.norm_ff.weight | [256] | 256\n",
"encoder.encoders.11.norm_ff.bias | [256] | 256\n",
"encoder.encoders.11.norm_mha.weight | [256] | 256\n",
"encoder.encoders.11.norm_mha.bias | [256] | 256\n",
"encoder.encoders.11.norm_ff_macaron.weight | [256] | 256\n",
"encoder.encoders.11.norm_ff_macaron.bias | [256] | 256\n",
"encoder.encoders.11.norm_conv.weight | [256] | 256\n",
"encoder.encoders.11.norm_conv.bias | [256] | 256\n",
"encoder.encoders.11.norm_final.weight | [256] | 256\n",
"encoder.encoders.11.norm_final.bias | [256] | 256\n",
"encoder.encoders.11.concat_linear.weight | [512, 256] | 131072\n",
"encoder.encoders.11.concat_linear.bias | [256] | 256\n",
"decoder.embed.0.weight | [4223, 256] | 1081088\n",
"decoder.after_norm.weight | [256] | 256\n",
"decoder.after_norm.bias | [256] | 256\n",
"decoder.output_layer.weight | [256, 4223] | 1081088\n",
"decoder.output_layer.bias | [4223] | 4223\n",
"decoder.decoders.0.self_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.0.self_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.0.self_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.0.self_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.0.self_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.0.self_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.0.self_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.0.self_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.0.src_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.0.src_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.0.src_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.0.src_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.0.src_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.0.src_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.0.src_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.0.src_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.0.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"decoder.decoders.0.feed_forward.w_1.bias | [2048] | 2048\n",
"decoder.decoders.0.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"decoder.decoders.0.feed_forward.w_2.bias | [256] | 256\n",
"decoder.decoders.0.norm1.weight | [256] | 256\n",
"decoder.decoders.0.norm1.bias | [256] | 256\n",
"decoder.decoders.0.norm2.weight | [256] | 256\n",
"decoder.decoders.0.norm2.bias | [256] | 256\n",
"decoder.decoders.0.norm3.weight | [256] | 256\n",
"decoder.decoders.0.norm3.bias | [256] | 256\n",
"decoder.decoders.0.concat_linear1.weight | [512, 256] | 131072\n",
"decoder.decoders.0.concat_linear1.bias | [256] | 256\n",
"decoder.decoders.0.concat_linear2.weight | [512, 256] | 131072\n",
"decoder.decoders.0.concat_linear2.bias | [256] | 256\n",
"decoder.decoders.1.self_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.1.self_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.1.self_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.1.self_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.1.self_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.1.self_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.1.self_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.1.self_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.1.src_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.1.src_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.1.src_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.1.src_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.1.src_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.1.src_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.1.src_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.1.src_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.1.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"decoder.decoders.1.feed_forward.w_1.bias | [2048] | 2048\n",
"decoder.decoders.1.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"decoder.decoders.1.feed_forward.w_2.bias | [256] | 256\n",
"decoder.decoders.1.norm1.weight | [256] | 256\n",
"decoder.decoders.1.norm1.bias | [256] | 256\n",
"decoder.decoders.1.norm2.weight | [256] | 256\n",
"decoder.decoders.1.norm2.bias | [256] | 256\n",
"decoder.decoders.1.norm3.weight | [256] | 256\n",
"decoder.decoders.1.norm3.bias | [256] | 256\n",
"decoder.decoders.1.concat_linear1.weight | [512, 256] | 131072\n",
"decoder.decoders.1.concat_linear1.bias | [256] | 256\n",
"decoder.decoders.1.concat_linear2.weight | [512, 256] | 131072\n",
"decoder.decoders.1.concat_linear2.bias | [256] | 256\n",
"decoder.decoders.2.self_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.2.self_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.2.self_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.2.self_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.2.self_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.2.self_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.2.self_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.2.self_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.2.src_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.2.src_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.2.src_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.2.src_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.2.src_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.2.src_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.2.src_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.2.src_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.2.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"decoder.decoders.2.feed_forward.w_1.bias | [2048] | 2048\n",
"decoder.decoders.2.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"decoder.decoders.2.feed_forward.w_2.bias | [256] | 256\n",
"decoder.decoders.2.norm1.weight | [256] | 256\n",
"decoder.decoders.2.norm1.bias | [256] | 256\n",
"decoder.decoders.2.norm2.weight | [256] | 256\n",
"decoder.decoders.2.norm2.bias | [256] | 256\n",
"decoder.decoders.2.norm3.weight | [256] | 256\n",
"decoder.decoders.2.norm3.bias | [256] | 256\n",
"decoder.decoders.2.concat_linear1.weight | [512, 256] | 131072\n",
"decoder.decoders.2.concat_linear1.bias | [256] | 256\n",
"decoder.decoders.2.concat_linear2.weight | [512, 256] | 131072\n",
"decoder.decoders.2.concat_linear2.bias | [256] | 256\n",
"decoder.decoders.3.self_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.3.self_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.3.self_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.3.self_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.3.self_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.3.self_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.3.self_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.3.self_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.3.src_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.3.src_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.3.src_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.3.src_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.3.src_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.3.src_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.3.src_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.3.src_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.3.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"decoder.decoders.3.feed_forward.w_1.bias | [2048] | 2048\n",
"decoder.decoders.3.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"decoder.decoders.3.feed_forward.w_2.bias | [256] | 256\n",
"decoder.decoders.3.norm1.weight | [256] | 256\n",
"decoder.decoders.3.norm1.bias | [256] | 256\n",
"decoder.decoders.3.norm2.weight | [256] | 256\n",
"decoder.decoders.3.norm2.bias | [256] | 256\n",
"decoder.decoders.3.norm3.weight | [256] | 256\n",
"decoder.decoders.3.norm3.bias | [256] | 256\n",
"decoder.decoders.3.concat_linear1.weight | [512, 256] | 131072\n",
"decoder.decoders.3.concat_linear1.bias | [256] | 256\n",
"decoder.decoders.3.concat_linear2.weight | [512, 256] | 131072\n",
"decoder.decoders.3.concat_linear2.bias | [256] | 256\n",
"decoder.decoders.4.self_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.4.self_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.4.self_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.4.self_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.4.self_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.4.self_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.4.self_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.4.self_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.4.src_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.4.src_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.4.src_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.4.src_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.4.src_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.4.src_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.4.src_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.4.src_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.4.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"decoder.decoders.4.feed_forward.w_1.bias | [2048] | 2048\n",
"decoder.decoders.4.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"decoder.decoders.4.feed_forward.w_2.bias | [256] | 256\n",
"decoder.decoders.4.norm1.weight | [256] | 256\n",
"decoder.decoders.4.norm1.bias | [256] | 256\n",
"decoder.decoders.4.norm2.weight | [256] | 256\n",
"decoder.decoders.4.norm2.bias | [256] | 256\n",
"decoder.decoders.4.norm3.weight | [256] | 256\n",
"decoder.decoders.4.norm3.bias | [256] | 256\n",
"decoder.decoders.4.concat_linear1.weight | [512, 256] | 131072\n",
"decoder.decoders.4.concat_linear1.bias | [256] | 256\n",
"decoder.decoders.4.concat_linear2.weight | [512, 256] | 131072\n",
"decoder.decoders.4.concat_linear2.bias | [256] | 256\n",
"decoder.decoders.5.self_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.5.self_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.5.self_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.5.self_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.5.self_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.5.self_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.5.self_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.5.self_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.5.src_attn.linear_q.weight | [256, 256] | 65536\n",
"decoder.decoders.5.src_attn.linear_q.bias | [256] | 256\n",
"decoder.decoders.5.src_attn.linear_k.weight | [256, 256] | 65536\n",
"decoder.decoders.5.src_attn.linear_k.bias | [256] | 256\n",
"decoder.decoders.5.src_attn.linear_v.weight | [256, 256] | 65536\n",
"decoder.decoders.5.src_attn.linear_v.bias | [256] | 256\n",
"decoder.decoders.5.src_attn.linear_out.weight | [256, 256] | 65536\n",
"decoder.decoders.5.src_attn.linear_out.bias | [256] | 256\n",
"decoder.decoders.5.feed_forward.w_1.weight | [256, 2048] | 524288\n",
"decoder.decoders.5.feed_forward.w_1.bias | [2048] | 2048\n",
"decoder.decoders.5.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"decoder.decoders.5.feed_forward.w_2.bias | [256] | 256\n",
"decoder.decoders.5.norm1.weight | [256] | 256\n",
"decoder.decoders.5.norm1.bias | [256] | 256\n",
"decoder.decoders.5.norm2.weight | [256] | 256\n",
"decoder.decoders.5.norm2.bias | [256] | 256\n",
"decoder.decoders.5.norm3.weight | [256] | 256\n",
"decoder.decoders.5.norm3.bias | [256] | 256\n",
"decoder.decoders.5.concat_linear1.weight | [512, 256] | 131072\n",
"decoder.decoders.5.concat_linear1.bias | [256] | 256\n",
"decoder.decoders.5.concat_linear2.weight | [512, 256] | 131072\n",
"decoder.decoders.5.concat_linear2.bias | [256] | 256\n",
"ctc.ctc_lo.weight | [256, 4223] | 1081088\n",
"ctc.ctc_lo.bias | [4223] | 4223\n",
"Total parameters: 689, 49347742 elements.\n"
]
}
],
"source": [
"summary(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ranking-beads",
"metadata": {},
"outputs": [],
"source": [
"total_loss, attention_loss, ctc_loss = model(self.audio, self.audio_len,\n",
" self.text, self.text_len)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
......@@ -77,15 +77,19 @@ class FeatureNormalizer(object):
:param filepath: File to write mean and stddev.
:type filepath: str
"""
np.savez(filepath, mean=self._mean, std=self._std)
np.savez(filepath, mean=self._mean, istd=self._istd)
def _read_mean_std_from_file(self, filepath, eps=1e-20):
"""Load mean and std from file."""
mean, std = load_cmvn(filepath, filetype='npz')
mean, istd = load_cmvn(filepath, filetype='npz')
self._mean = mean.T
self._istd = 1.0 / std.T
self._istd = istd.T
def _compute_mean_std(self, manifest_path, featurize_func, num_samples):
def _compute_mean_std(self,
manifest_path,
featurize_func,
num_samples,
eps=1e-20):
"""Compute mean and std from randomly sampled instances."""
manifest = read_manifest(manifest_path)
if num_samples == -1:
......@@ -98,4 +102,6 @@ class FeatureNormalizer(object):
featurize_func(AudioSegment.from_file(instance["feat"])))
features = np.hstack(features) #(D, T)
self._mean = np.mean(features, axis=1).reshape([1, -1]) #(1, D)
self._std = np.std(features, axis=1).reshape([1, -1]) #(1, D)
std = np.std(features, axis=1).reshape([1, -1]) #(1, D)
std = np.clip(std, eps, None)
self._istd = 1.0 / std
......@@ -238,10 +238,8 @@ def _load_kaldi_cmvn(kaldi_cmvn_file):
def _load_npz_cmvn(npz_cmvn_file, eps=1e-20):
npzfile = np.load(npz_cmvn_file)
means = npzfile["mean"] #(1, D)
std = npzfile["std"] #(1, D)
std = np.clip(std, eps, None)
variance = 1.0 / std
cmvn = np.array([means, variance])
istd = npzfile["istd"] #(1, D)
cmvn = np.array([means, istd])
return cmvn
......
......@@ -25,7 +25,7 @@ __all__ = [
def sequence_mask(x_len, max_len=None, dtype='float32'):
"""[summary]
"""batch sequence mask.
Args:
x_len ([paddle.Tensor]): xs lenght, [B]
......
......@@ -22,8 +22,6 @@ __all__ = [
def summary(layer: nn.Layer, print_func=print):
num_params = num_elements = 0
if print_func:
print_func(f"{layer.__class__.__name__} summary:")
for name, param in layer.state_dict().items():
if print_func:
print_func(
......@@ -31,9 +29,7 @@ def summary(layer: nn.Layer, print_func=print):
num_elements += np.prod(param.shape)
num_params += 1
if print_func:
print_func(
f"{layer.__class__.__name__} has {num_params} parameters, {num_elements} elements."
)
print_func(f"Total parameters: {num_params}, {num_elements} elements.")
def gradient_norm(layer: nn.Layer):
......@@ -45,25 +41,6 @@ def gradient_norm(layer: nn.Layer):
return grad_norm_dict
def recursively_remove_weight_norm(layer: nn.Layer):
for layer in layer.sublayers():
try:
nn.utils.remove_weight_norm(layer)
except ValueError as e:
# ther is not weight norm hoom in this layer
pass
def freeze(layer: nn.Layer):
for param in layer.parameters():
param.trainable = False
def unfreeze(layer: nn.Layer):
for param in layer.parameters():
param.trainable = True
def print_grads(model, print_func=print):
if print_func is None:
return
......@@ -75,12 +52,32 @@ def print_grads(model, print_func=print):
def print_params(model, print_func=print):
if print_func is None:
return
total = 0.0
num_params = 0.0
for n, p in model.named_parameters():
msg = f"param: {n}: shape: {p.shape} stop_grad: {p.stop_gradient}"
msg = f"{n} | {p.shape} | {np.prod(p.shape)} | {not p.stop_gradient}"
total += np.prod(p.shape)
num_params += 1
if print_func:
print_func(msg)
if print_func:
print_func(f"Total parameters: {total}!")
print_func(f"Total parameters: {num_params}, {total} elements.")
def recursively_remove_weight_norm(layer: nn.Layer):
for layer in layer.sublayers():
try:
nn.utils.remove_weight_norm(layer)
except ValueError as e:
# ther is not weight norm hoom in this layer
pass
def freeze(layer: nn.Layer):
for param in layer.parameters():
param.trainable = False
def unfreeze(layer: nn.Layer):
for param in layer.parameters():
param.trainable = True
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册