提交 20f19768 编写于 作者: H Hui Zhang

add notebook test

上级 c607bff2
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "academic-surname",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/layers/utils.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
"Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
" def convert_to_list(value, n, name, dtype=np.int):\n"
]
}
],
"source": [
"import paddle\n",
"from paddle import nn"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fundamental-treasure",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
" and should_run_async(code)\n"
]
}
],
"source": [
"L = nn.Linear(256, 2048)\n",
"L2 = nn.Linear(2048, 256)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "consolidated-elephant",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import torch\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "moderate-noise",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"float64\n",
"Tensor(shape=[2, 51, 256], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[[-0.03137276, 0.75036579, -0.62955737, ..., -0.39516482, 2.41965628, 0.19466873],\n",
" [ 0.55916852, 1.13357353, 0.28754908, ..., 0.28860641, 0.48257691, -1.07664418],\n",
" [-0.27433595, -0.05911482, 0.04942252, ..., 0.46596146, 1.24395037, -1.98374581],\n",
" ...,\n",
" [-0.45322138, 0.51459873, 0.28475651, ..., -0.90797561, -0.80436397, -2.30388594],\n",
" [ 0.20310247, 1.90435207, -1.02483511, ..., -1.59850407, -0.30733466, 0.49769276],\n",
" [-2.63085651, -0.52244109, 0.32019949, ..., 1.10662329, -0.55995786, -0.36770794]],\n",
"\n",
" [[-1.78831303, 2.24759626, 0.41386250, ..., -0.30020580, -0.16084948, 0.93251175],\n",
" [ 0.03264519, -0.92942363, 1.58523536, ..., 1.23681784, -0.94711000, 0.63553023],\n",
" [-0.19725564, -2.38587499, -0.29334834, ..., 0.83498263, -0.58492625, 0.58732986],\n",
" ...,\n",
" [-0.61646742, -1.02978027, 0.45410269, ..., 0.87052751, -0.20801133, 2.17943859],\n",
" [-0.67230755, -0.79410625, -0.13054833, ..., -1.18138039, -0.47578079, -0.22610545],\n",
" [ 2.57333422, 0.63872230, 0.70852041, ..., -0.44040251, -0.33339104, -0.24722832]]])\n",
"tensor([[[-0.0314, 0.7504, -0.6296, ..., -0.3952, 2.4197, 0.1947],\n",
" [ 0.5592, 1.1336, 0.2875, ..., 0.2886, 0.4826, -1.0766],\n",
" [-0.2743, -0.0591, 0.0494, ..., 0.4660, 1.2440, -1.9837],\n",
" ...,\n",
" [-0.4532, 0.5146, 0.2848, ..., -0.9080, -0.8044, -2.3039],\n",
" [ 0.2031, 1.9044, -1.0248, ..., -1.5985, -0.3073, 0.4977],\n",
" [-2.6309, -0.5224, 0.3202, ..., 1.1066, -0.5600, -0.3677]],\n",
"\n",
" [[-1.7883, 2.2476, 0.4139, ..., -0.3002, -0.1608, 0.9325],\n",
" [ 0.0326, -0.9294, 1.5852, ..., 1.2368, -0.9471, 0.6355],\n",
" [-0.1973, -2.3859, -0.2933, ..., 0.8350, -0.5849, 0.5873],\n",
" ...,\n",
" [-0.6165, -1.0298, 0.4541, ..., 0.8705, -0.2080, 2.1794],\n",
" [-0.6723, -0.7941, -0.1305, ..., -1.1814, -0.4758, -0.2261],\n",
" [ 2.5733, 0.6387, 0.7085, ..., -0.4404, -0.3334, -0.2472]]])\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
" and should_run_async(code)\n"
]
}
],
"source": [
"x = np.random.randn(2, 51, 256)\n",
"print(x.dtype)\n",
"px = paddle.to_tensor(x, dtype='float32')\n",
"tx = torch.tensor(x, dtype=torch.float32)\n",
"print(px)\n",
"print(tx)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cooked-progressive",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 5,
"id": "mechanical-prisoner",
"metadata": {},
"outputs": [],
"source": [
"data = np.load('enc_0_ff_out.npz', allow_pickle=True)\n",
"t_norm_ff = data['norm_ff']\n",
"t_ff_out = data['ff_out']\n",
"t_ff_l_x = data['ff_l_x']\n",
"t_ff_l_a_x = data['ff_l_a_x']\n",
"t_ff_l_a_l_x = data['ff_l_a_l_x']\n",
"t_ps = data['ps']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "indie-marriage",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"id": "assured-zambia",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n",
"True\n",
"True\n"
]
}
],
"source": [
"L.set_state_dict({'weight': t_ps[0].T, 'bias': t_ps[1]})\n",
"L2.set_state_dict({'weight': t_ps[2].T, 'bias': t_ps[3]})\n",
"\n",
"ps = []\n",
"for n, p in L.named_parameters():\n",
" ps.append(p)\n",
"\n",
"for n, p in L2.state_dict().items():\n",
" ps.append(p)\n",
" \n",
"for p, tp in zip(ps, t_ps):\n",
" print(np.allclose(p.numpy(), tp.T))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "committed-jacob",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "extreme-traffic",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "optimum-milwaukee",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 11,
"id": "viral-indian",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n",
"True\n",
"True\n"
]
}
],
"source": [
"# data = np.load('enc_0_ff_out.npz', allow_pickle=True)\n",
"# t_norm_ff = data['norm_ff']\n",
"# t_ff_out = data['ff_out']\n",
"# t_ff_l_x = data['ff_l_x']\n",
"# t_ff_l_a_x = data['ff_l_a_x']\n",
"# t_ff_l_a_l_x = data['ff_l_a_l_x']\n",
"# t_ps = data['ps']\n",
"TL = torch.nn.Linear(256, 2048)\n",
"TL2 = torch.nn.Linear(2048, 256)\n",
"TL.load_state_dict({'weight': torch.tensor(t_ps[0]), 'bias': torch.tensor(t_ps[1])})\n",
"TL2.load_state_dict({'weight': torch.tensor(t_ps[2]), 'bias': torch.tensor(t_ps[3])})\n",
"\n",
"# for n, p in TL.named_parameters():\n",
"# print(n, p)\n",
"# for n, p in TL2.named_parameters():\n",
"# print(n, p)\n",
"\n",
"ps = []\n",
"for n, p in TL.state_dict().items():\n",
" ps.append(p.data.numpy())\n",
" \n",
"for n, p in TL2.state_dict().items():\n",
" ps.append(p.data.numpy())\n",
" \n",
"for p, tp in zip(ps, t_ps):\n",
" print(np.allclose(p, tp))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "skilled-vietnamese",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[-0.25528666 -0.9090747 0.12996 ... 0.02552819 0.37376517\n",
" -0.558986 ]\n",
" [-0.45657372 0.23811203 0.33472425 ... 1.0797666 -0.7263612\n",
" 0.31549692]]\n",
"[[-0.25528657 -0.9090746 0.12996009 ... 0.02552832 0.37376505\n",
" -0.5589858 ]\n",
" [-0.45657367 0.23811209 0.33472428 ... 1.0797666 -0.7263612\n",
" 0.31549698]]\n",
"True\n",
"False\n"
]
}
],
"source": [
"y = L(px)\n",
"print(y.numpy())\n",
"\n",
"ty = TL(tx)\n",
"print(ty.data.numpy())\n",
"print(np.allclose(px.numpy(), tx.detach().numpy()))\n",
"print(np.allclose(y.numpy(), ty.detach().numpy()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "incorrect-allah",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "prostate-cameroon",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 14,
"id": "governmental-surge",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.07453135 0.0698561 0.6273111 ... 0.5845924 -0.65527105\n",
" 0.5881643 ]\n",
" [ 0.3902049 -0.17455879 -1.1802813 ... -0.36912322 0.55681896\n",
" -0.11917676]]\n",
"[[ 0.07453132 0.06985616 0.62731117 ... 0.5845925 -0.65527105\n",
" 0.5881642 ]\n",
" [ 0.39020485 -0.17455864 -1.1802814 ... -0.3691232 0.556819\n",
" -0.11917675]]\n",
"True\n",
"False\n",
"True\n"
]
}
],
"source": [
"x = np.random.randn(2, 256)\n",
"px = paddle.to_tensor(x, dtype='float32')\n",
"tx = torch.tensor(x, dtype=torch.float32)\n",
"y = L(px)\n",
"print(y.numpy())\n",
"ty = TL(tx)\n",
"print(ty.data.numpy())\n",
"print(np.allclose(px.numpy(), tx.detach().numpy()))\n",
"print(np.allclose(y.numpy(), ty.detach().numpy()))\n",
"print(np.allclose(y.numpy(), ty.detach().numpy(), atol=1e-5))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "confidential-jacket",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "improved-civilization",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 32,
"id": "academic-surname",
"metadata": {},
"outputs": [],
"source": [
"import paddle\n",
"from paddle import nn"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "fundamental-treasure",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Parameter containing:\n",
"Tensor(shape=[256], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
" [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])\n",
"Parameter containing:\n",
"Tensor(shape=[256], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
" [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])\n"
]
}
],
"source": [
"L = nn.LayerNorm(256, epsilon=1e-12)\n",
"for p in L.parameters():\n",
" print(p)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "consolidated-elephant",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "moderate-noise",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"float64\n"
]
}
],
"source": [
"x = np.random.randn(2, 51, 256)\n",
"print(x.dtype)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "cooked-progressive",
"metadata": {},
"outputs": [],
"source": [
"y = L(paddle.to_tensor(x, dtype='float32'))"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "optimum-milwaukee",
"metadata": {},
"outputs": [],
"source": [
"import torch"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "viral-indian",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Parameter containing:\n",
"tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1.], requires_grad=True)\n",
"Parameter containing:\n",
"tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" requires_grad=True)\n"
]
}
],
"source": [
"TL = torch.nn.LayerNorm(256, eps=1e-12)\n",
"for p in TL.parameters():\n",
" print(p)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "skilled-vietnamese",
"metadata": {},
"outputs": [],
"source": [
"ty = TL(torch.tensor(x, dtype=torch.float32))"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "incorrect-allah",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.allclose(y.numpy(), ty.detach().numpy())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "prostate-cameroon",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 52,
"id": "governmental-surge",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = np.random.randn(2, 256)\n",
"y = L(paddle.to_tensor(x, dtype='float32'))\n",
"ty = TL(torch.tensor(x, dtype=torch.float32))\n",
"np.allclose(y.numpy(), ty.detach().numpy())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "confidential-jacket",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 29,
"id": "designing-borough",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.0000000e+00 0.0000000e+00 0.0000000e+00 ... 0.0000000e+00\n",
" 0.0000000e+00 0.0000000e+00]\n",
" [ 8.4147096e-01 8.0196178e-01 7.6172036e-01 ... 1.2409373e-04\n",
" 1.1547816e-04 1.0746076e-04]\n",
" [ 9.0929741e-01 9.5814437e-01 9.8704624e-01 ... 2.4818745e-04\n",
" 2.3095631e-04 2.1492151e-04]\n",
" ...\n",
" [ 3.7960774e-01 7.4510968e-01 7.3418564e-01 ... 1.2036801e-02\n",
" 1.1201146e-02 1.0423505e-02]\n",
" [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ... 1.2160885e-02\n",
" 1.1316618e-02 1.0530960e-02]\n",
" [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ... 1.2284970e-02\n",
" 1.1432089e-02 1.0638415e-02]]\n",
"True\n",
"True\n"
]
}
],
"source": [
"import torch\n",
"import math\n",
"import numpy as np\n",
"\n",
"max_len=100\n",
"d_model=256\n",
"\n",
"pe = torch.zeros(max_len, d_model)\n",
"position = torch.arange(0, max_len,\n",
" dtype=torch.float32).unsqueeze(1)\n",
"toruch_position = position\n",
"div_term = torch.exp(\n",
" torch.arange(0, d_model, 2, dtype=torch.float32) *\n",
" -(math.log(10000.0) / d_model))\n",
"tourch_div_term = div_term.cpu().detach().numpy()\n",
"\n",
"\n",
"\n",
"torhc_sin = torch.sin(position * div_term)\n",
"torhc_cos = torch.cos(position * div_term)\n",
"print(torhc_sin.cpu().detach().numpy())\n",
"np_sin = np.sin((position * div_term).cpu().detach().numpy())\n",
"np_cos = np.cos((position * div_term).cpu().detach().numpy())\n",
"print(np.allclose(np_sin, torhc_sin.cpu().detach().numpy()))\n",
"print(np.allclose(np_cos, torhc_cos.cpu().detach().numpy()))\n",
"pe[:, 0::2] = torhc_sin\n",
"pe[:, 1::2] = torhc_cos\n",
"tourch_pe = pe.cpu().detach().numpy()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "swiss-referral",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n",
"False\n",
"False\n",
"False\n",
"False\n"
]
}
],
"source": [
"import paddle\n",
"ppe = paddle.zeros([max_len, d_model])\n",
"position = paddle.arange(0, max_len,\n",
" dtype='float32').unsqueeze(1)\n",
"print(np.allclose(position.numpy(), toruch_position))\n",
"div_term = paddle.exp(\n",
" paddle.arange(0, d_model, 2, dtype='float32') *\n",
" -(math.log(10000.0) / d_model))\n",
"print(np.allclose(div_term.numpy(), tourch_div_term))\n",
"\n",
"\n",
"\n",
"p_sin = paddle.sin(position * div_term)\n",
"p_cos = paddle.cos(position * div_term)\n",
"print(np.allclose(np_sin, p_sin.numpy(), rtol=1.e-6, atol=0))\n",
"print(np.allclose(np_cos, p_cos.numpy(), rtol=1.e-6, atol=0))\n",
"ppe[:, 0::2] = p_sin\n",
"ppe[:, 1::2] = p_cos\n",
"print(np.allclose(p_sin.numpy(), torhc_sin.cpu().detach().numpy()))\n",
"print(np.allclose(p_cos.numpy(), torhc_cos.cpu().detach().numpy()))"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "integrated-boards",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"False\n"
]
}
],
"source": [
"print(np.allclose(ppe.numpy(), pe.numpy()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "flying-reserve",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "revised-divide",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
......@@ -100,7 +100,7 @@
"text": [
"/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
" and should_run_async(code)\n",
"[INFO 2021/04/19 06:57:01 u2.py:834] U2 Encoder type: conformer\n"
"[INFO 2021/04/20 03:32:21 u2.py:834] U2 Encoder type: conformer\n"
]
},
{
......@@ -1439,13 +1439,7 @@
"decoder.decoders.3.feed_forward.w_2.weight | [2048, 256] | 524288\n",
"decoder.decoders.3.feed_forward.w_2.bias | [256] | 256\n",
"decoder.decoders.3.norm1.weight | [256] | 256\n",
"decoder.decoders.3.norm1.bias | [256] | 256\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"decoder.decoders.3.norm1.bias | [256] | 256\n",
"decoder.decoders.3.norm2.weight | [256] | 256\n",
"decoder.decoders.3.norm2.bias | [256] | 256\n",
"decoder.decoders.3.norm3.weight | [256] | 256\n",
......@@ -1526,7 +1520,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "ruled-invitation",
"metadata": {},
"outputs": [
......@@ -2184,6 +2178,16 @@
"print(model)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "fossil-means",
"metadata": {},
"outputs": [],
"source": [
"# load feat"
]
},
{
"cell_type": "code",
"execution_count": 7,
......@@ -2194,13 +2198,18 @@
"name": "stdout",
"output_type": "stream",
"text": [
"compute_cmvn_loader_test.ipynb jit_infer.ipynb\r\n",
"dataloader.ipynb mask_and_masked_fill_test.ipynb\r\n",
"dataloader_with_tokens_tokenids.ipynb model.npz\r\n",
"data.npz python_test.ipynb\r\n",
"decoder.npz train_test.ipynb\r\n",
"encoder.npz u2_model.ipynb\r\n",
"hack_api_test.ipynb\r\n"
"compute_cmvn_loader_test.ipynb encoder.npz\r\n",
"dataloader.ipynb hack_api_test.ipynb\r\n",
"dataloader_with_tokens_tokenids.ipynb jit_infer.ipynb\r\n",
"data.npz layer_norm_test.ipynb\r\n",
"decoder.npz Linear_test.ipynb\r\n",
"enc_0_ff_out.npz mask_and_masked_fill_test.ipynb\r\n",
"enc_0_norm_ff.npz model.npz\r\n",
"enc_0.npz position_embeding_check.ipynb\r\n",
"enc_0_selattn_out.npz python_test.ipynb\r\n",
"enc_2.npz train_test.ipynb\r\n",
"enc_all.npz u2_model.ipynb\r\n",
"enc_embed.npz\r\n"
]
}
],
......@@ -2213,21 +2222,6 @@
"execution_count": 8,
"id": "abroad-oracle",
"metadata": {},
"outputs": [],
"source": [
"data = np.load('.notebook/data.npz', allow_pickle=True)\n",
"keys=data['keys']\n",
"feat=data['feat']\n",
"feat_len=data['feat_len']\n",
"text=data['text']\n",
"text_len=data['text_len']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "false-instrument",
"metadata": {},
"outputs": [
{
"name": "stdout",
......@@ -2311,6 +2305,12 @@
}
],
"source": [
"data = np.load('.notebook/data.npz', allow_pickle=True)\n",
"keys=data['keys']\n",
"feat=data['feat']\n",
"feat_len=data['feat_len']\n",
"text=data['text']\n",
"text_len=data['text_len']\n",
"print(keys)\n",
"print(feat.shape)\n",
"print(feat)\n",
......@@ -2321,7 +2321,15 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "false-instrument",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 9,
"id": "arctic-proxy",
"metadata": {},
"outputs": [],
......@@ -2400,7 +2408,15 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"id": "seasonal-switch",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 10,
"id": "defined-brooks",
"metadata": {},
"outputs": [
......@@ -2408,17 +2424,23 @@
"name": "stdout",
"output_type": "stream",
"text": [
"compute_cmvn_loader_test.ipynb\t jit_infer.ipynb\r\n",
"dataloader.ipynb\t\t mask_and_masked_fill_test.ipynb\r\n",
"dataloader_with_tokens_tokenids.ipynb model.npz\r\n",
"data.npz\t\t\t python_test.ipynb\r\n",
"decoder.npz\t\t\t train_test.ipynb\r\n",
"encoder.npz\t\t\t u2_model.ipynb\r\n",
"hack_api_test.ipynb\r\n"
"compute_cmvn_loader_test.ipynb\t encoder.npz\r\n",
"dataloader.ipynb\t\t hack_api_test.ipynb\r\n",
"dataloader_with_tokens_tokenids.ipynb jit_infer.ipynb\r\n",
"data.npz\t\t\t layer_norm_test.ipynb\r\n",
"decoder.npz\t\t\t Linear_test.ipynb\r\n",
"enc_0_ff_out.npz\t\t mask_and_masked_fill_test.ipynb\r\n",
"enc_0_norm_ff.npz\t\t model.npz\r\n",
"enc_0.npz\t\t\t position_embeding_check.ipynb\r\n",
"enc_0_selattn_out.npz\t\t python_test.ipynb\r\n",
"enc_2.npz\t\t\t train_test.ipynb\r\n",
"enc_all.npz\t\t\t u2_model.ipynb\r\n",
"enc_embed.npz\r\n"
]
}
],
"source": [
"# load model param\n",
"!ls .notebook\n",
"data = np.load('.notebook/model.npz', allow_pickle=True)\n",
"state_dict = data['state'].item()\n",
......@@ -2445,7 +2467,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 11,
"id": "confident-piano",
"metadata": {},
"outputs": [
......@@ -2478,6 +2500,7 @@
}
],
"source": [
"# compute loss\n",
"import paddle\n",
"feat=paddle.to_tensor(feat)\n",
"feat_len=paddle.to_tensor(feat_len, dtype='int64')\n",
......@@ -2492,12 +2515,15 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "better-senator",
"metadata": {},
"outputs": [],
"source": [
"# tensor(142.4858, device='cuda:0', grad_fn=<AddBackward0>) tensor(41.8416, device='cuda:0', grad_fn=<DivBackward0>) tensor(377.3222, device='cuda:0', grad_fn=<DivBackward0>)"
"# tensor(142.4888, device='cuda:0', grad_fn=<AddBackward0>) \n",
"# tensor(41.8415, device='cuda:0', grad_fn=<DivBackward0>) \n",
"# tensor(377.3326, device='cuda:0', grad_fn=<DivBackward0>)\n",
"# 142.4888 41.84146 377.33258"
]
},
{
......@@ -2510,7 +2536,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 13,
"id": "olympic-problem",
"metadata": {},
"outputs": [
......@@ -2532,23 +2558,16 @@
}
],
"source": [
"# ecnoder\n",
"encoder_out, encoder_mask = model.encoder(feat, feat_len)\n",
"print(encoder_out.shape)\n",
"print(encoder_mask.shape)\n",
"print(encoder_out[0])\n"
"print(encoder_out[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cubic-values",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 14,
"id": "shaped-alaska",
"metadata": {},
"outputs": [
......@@ -2571,7 +2590,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 15,
"id": "federal-rover",
"metadata": {},
"outputs": [
......@@ -2589,7 +2608,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"id": "regulated-interstate",
"metadata": {},
"outputs": [
......@@ -2610,18 +2629,38 @@
" [-1.165412 0.6819976 0.69394535 ... 1.2238353 0.80282927\n",
" 1.4506509 ]\n",
" [-1.2732087 0.71458083 0.7581961 ... 0.9415482 0.877484\n",
" 1.2623053 ]]\n"
" 1.2623053 ]]\n",
"----\n",
"[[-0.7019418 0.56254166 0.6880346 ... 1.1237322 0.78039235\n",
" 1.1369387 ]\n",
" [-0.7787781 0.39126658 0.71887815 ... 1.2518822 0.8861679\n",
" 1.3173453 ]\n",
" [-0.95908946 0.6346025 0.87671334 ... 0.9818373 0.7440108\n",
" 1.2903266 ]\n",
" ...\n",
" [-1.073225 0.67236906 0.9230311 ... 0.9075456 0.81767166\n",
" 1.3239657 ]\n",
" [-1.1654116 0.68199694 0.69394493 ... 1.2238349 0.8028289\n",
" 1.4506508 ]\n",
" [-1.2732095 0.7145803 0.7581956 ... 0.9415491 0.87748396\n",
" 1.2623051 ]]\n",
"True\n",
"False\n"
]
}
],
"source": [
"print(np.allclose(torch_encoder_out, encoder_out.numpy()))\n",
"print(torch_encoder_out[0])"
"print(torch_encoder_out[0])\n",
"print(\"----\")\n",
"print(encoder_out.numpy()[0])\n",
"print(np.allclose(torch_encoder_out, encoder_out.numpy(), atol=1e-5, rtol=1e-6))\n",
"print(np.allclose(torch_encoder_out, encoder_out.numpy(), atol=1e-6, rtol=1e-6))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 17,
"id": "proof-scheduling",
"metadata": {},
"outputs": [
......@@ -2630,23 +2669,23 @@
"output_type": "stream",
"text": [
"Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
" [377.32220459])\n",
" [377.33258057])\n",
"[1.]\n",
"[[ 3.1708076e+00 -1.5184805e-02 4.9524564e-02 ... -2.4678309e-03\n",
" -5.9236852e-03 -7.2192554e-03]\n",
" [-1.7474542e+00 7.7654729e-03 -4.5106117e-02 ... 9.8463835e-04\n",
" 2.4569160e-03 2.2863639e-03]\n",
" [-2.3707268e+00 1.3136451e-02 -2.6281785e-02 ... 2.2738585e-03\n",
" 5.7726162e-03 7.4628354e-03]\n",
"[[ 3.16902876e+00 -1.51763987e-02 4.91095744e-02 ... -2.47971853e-03\n",
" -5.93360700e-03 -7.26609165e-03]\n",
" [-1.74184477e+00 7.75874173e-03 -4.49434854e-02 ... 9.92412097e-04\n",
" 2.46337592e-03 2.31892057e-03]\n",
" [-2.33343339e+00 1.30475955e-02 -2.66557075e-02 ... 2.27532350e-03\n",
" 5.76924905e-03 7.48788286e-03]\n",
" ...\n",
" [-4.4350743e+00 2.4916438e-02 -9.0385124e-02 ... 4.4534383e-03\n",
" 1.1696636e-02 1.4515720e-02]\n",
" [-3.3899918e+00 1.7287316e-02 -6.3514955e-02 ... 3.2612216e-03\n",
" 8.5411733e-03 1.0692922e-02]\n",
" [-6.6964636e+00 3.5097409e-02 -1.2437013e-01 ... 6.3515711e-03\n",
" 1.6078018e-02 2.0318989e-02]]\n",
"[-4.4341431e+00 2.3347888e-02 -9.3501516e-02 ... 4.2512305e-03\n",
" 1.0928102e-02 1.3750527e-02]\n"
" [-4.30358458e+00 2.46054661e-02 -9.00950655e-02 ... 4.43156436e-03\n",
" 1.16122244e-02 1.44715561e-02]\n",
" [-3.36921120e+00 1.73153952e-02 -6.36872873e-02 ... 3.28363618e-03\n",
" 8.58010259e-03 1.07794888e-02]\n",
" [-6.62045336e+00 3.49955931e-02 -1.23962618e-01 ... 6.36671018e-03\n",
" 1.60814095e-02 2.03891303e-02]]\n",
"[-4.3777819e+00 2.3245810e-02 -9.3339294e-02 ... 4.2569344e-03\n",
" 1.0919910e-02 1.3787797e-02]\n"
]
}
],
......@@ -2679,23 +2718,25 @@
"print(loss_ctc.grad)\n",
"print(model.ctc.ctc_lo.weight.grad)\n",
"print(model.ctc.ctc_lo.bias.grad)\n",
"# tensor(377.3222, device='cuda:0', grad_fn=<DivBackward0>)\n",
"\n",
"\n",
"# tensor(377.3326, device='cuda:0', grad_fn=<DivBackward0>)\n",
"# None\n",
"# tensor([[ 3.1708e+00, -1.7475e+00, -2.3708e+00, ..., -4.4351e+00,\n",
"# -3.3900e+00, -6.6965e+00],\n",
"# [-1.5185e-02, 7.7655e-03, 1.3137e-02, ..., 2.4917e-02,\n",
"# 1.7287e-02, 3.5098e-02],\n",
"# [ 4.9522e-02, -4.5104e-02, -2.6280e-02, ..., -9.0381e-02,\n",
"# -6.3512e-02, -1.2436e-01],\n",
"# ...,\n",
"# [-2.4678e-03, 9.8464e-04, 2.2739e-03, ..., 4.4535e-03,\n",
"# 3.2612e-03, 6.3516e-03],\n",
"# [-5.9237e-03, 2.4569e-03, 5.7726e-03, ..., 1.1697e-02,\n",
"# 8.5412e-03, 1.6078e-02],\n",
"# [-7.2193e-03, 2.2864e-03, 7.4629e-03, ..., 1.4516e-02,\n",
"# 1.0693e-02, 2.0319e-02]], device='cuda:0')\n",
"# tensor([-4.4342e+00, 2.3348e-02, -9.3497e-02, ..., 4.2513e-03,\n",
"# 1.0928e-02, 1.3751e-02], device='cuda:0')"
"# [[ 3.16902351e+00 -1.51765049e-02 4.91097234e-02 ... -2.47973716e-03\n",
"# -5.93366381e-03 -7.26613170e-03]\n",
"# [-1.74185038e+00 7.75875803e-03 -4.49435972e-02 ... 9.92415240e-04\n",
"# 2.46338220e-03 2.31891591e-03]\n",
"# [-2.33343077e+00 1.30476682e-02 -2.66557615e-02 ... 2.27533933e-03\n",
"# 5.76929189e-03 7.48792710e-03]\n",
"# ...\n",
"# [-4.30356789e+00 2.46056803e-02 -9.00955945e-02 ... 4.43160534e-03\n",
"# 1.16123557e-02 1.44716976e-02]\n",
"# [-3.36919212e+00 1.73155665e-02 -6.36875406e-02 ... 3.28367390e-03\n",
"# 8.58021621e-03 1.07796099e-02]\n",
"# [-6.62039661e+00 3.49958315e-02 -1.23963736e-01 ... 6.36674836e-03\n",
"# 1.60815325e-02 2.03892551e-02]]\n",
"# [-4.3777566e+00 2.3245990e-02 -9.3339972e-02 ... 4.2569702e-03\n",
"# 1.0920014e-02 1.3787906e-02]"
]
},
{
......@@ -2708,7 +2749,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 18,
"id": "synthetic-hungarian",
"metadata": {},
"outputs": [
......@@ -2717,7 +2758,7 @@
"output_type": "stream",
"text": [
"Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
" [41.84160995]) 0.0\n"
" [41.84146118]) 0.0\n"
]
}
],
......@@ -2730,17 +2771,15 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 19,
"id": "indian-sweden",
"metadata": {},
"outputs": [],
"source": [
"# encoder, decoder不对齐"
]
"source": []
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 202,
"id": "marine-cuisine",
"metadata": {},
"outputs": [
......@@ -2772,7 +2811,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 180,
"id": "several-result",
"metadata": {},
"outputs": [],
......@@ -2833,7 +2872,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 181,
"id": "possible-bulgaria",
"metadata": {},
"outputs": [
......@@ -2890,7 +2929,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 285,
"id": "north-walter",
"metadata": {},
"outputs": [
......@@ -2898,25 +2937,49 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[16, 7, 4233]\n",
"Tensor(shape=[7, 4233], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
" [[-0.37638962, -0.82272029, 0.74276292, ..., 0.34200522, 0.01503509, 0.40337229],\n",
" [-0.87386417, -0.31389427, 0.41987872, ..., 0.37723723, -0.14352795, -1.00236630],\n",
" [-0.43505096, 0.03450463, -0.28710306, ..., 0.07727426, -1.16722453, -0.26848495],\n",
" ...,\n",
" [ 0.42471474, 0.58885634, 0.02020410, ..., 0.37405482, 0.04546990, -0.37139422],\n",
" [-0.37978464, -0.81084198, 0.75725073, ..., 0.26038912, -0.00079346, 0.42537683],\n",
" [-0.38279879, -0.81206709, 0.74943423, ..., 0.26172996, -0.00104988, 0.42678767]])\n",
"False\n"
"False\n",
"True\n",
"False\n",
"[[-3.76389682e-01 -8.22720408e-01 7.42762923e-01 ... 3.42005253e-01\n",
" 1.50350705e-02 4.03372347e-01]\n",
" [-8.73864174e-01 -3.13894272e-01 4.19878662e-01 ... 3.77237231e-01\n",
" -1.43528014e-01 -1.00236630e+00]\n",
" [-4.35050905e-01 3.45046446e-02 -2.87102997e-01 ... 7.72742853e-02\n",
" -1.16722476e+00 -2.68485069e-01]\n",
" ...\n",
" [ 4.24714804e-01 5.88856399e-01 2.02039629e-02 ... 3.74054879e-01\n",
" 4.54700664e-02 -3.71394157e-01]\n",
" [-3.79784584e-01 -8.10841978e-01 7.57250786e-01 ... 2.60389000e-01\n",
" -7.93404877e-04 4.25376773e-01]\n",
" [-3.82798851e-01 -8.12067091e-01 7.49434292e-01 ... 2.61730075e-01\n",
" -1.04988366e-03 4.26787734e-01]]\n",
"---\n",
"[[-3.7638968e-01 -8.2272053e-01 7.4276292e-01 ... 3.4200522e-01\n",
" 1.5034772e-02 4.0337229e-01]\n",
" [-8.7386459e-01 -3.1389427e-01 4.1987866e-01 ... 3.7723729e-01\n",
" -1.4352810e-01 -1.0023664e+00]\n",
" [-4.3505096e-01 3.4504786e-02 -2.8710306e-01 ... 7.7274129e-02\n",
" -1.1672243e+00 -2.6848501e-01]\n",
" ...\n",
" [ 4.2471480e-01 5.8885634e-01 2.0203922e-02 ... 3.7405500e-01\n",
" 4.5470044e-02 -3.7139410e-01]\n",
" [-3.7978446e-01 -8.1084180e-01 7.5725085e-01 ... 2.6038891e-01\n",
" -7.9347193e-04 4.2537671e-01]\n",
" [-3.8279903e-01 -8.1206715e-01 7.4943429e-01 ... 2.6173013e-01\n",
" -1.0499060e-03 4.2678756e-01]]\n"
]
}
],
"source": [
"decoder_out, _ = model.decoder(encoder_out, encoder_mask, ys_in_pad,\n",
" ys_in_lens)\n",
"print(decoder_out.shape)\n",
"print(decoder_out[0])\n",
"print(np.allclose(decoder_out.numpy(), torch_decoder_out))"
"\n",
"print(np.allclose(decoder_out.numpy(), torch_decoder_out))\n",
"print(np.allclose(decoder_out.numpy(), torch_decoder_out, atol=1e-6))\n",
"print(np.allclose(decoder_out.numpy(), torch_decoder_out, atol=1e-7))\n",
"print(decoder_out.numpy()[0])\n",
"print('---')\n",
"print(torch_decoder_out[0])"
]
},
{
......@@ -2945,13 +3008,15 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 183,
"id": "assisted-fortune",
"metadata": {},
"outputs": [],
"source": [
"from paddle import nn\n",
"import paddle\n",
"from paddle.nn import functional as F\n",
"\n",
"class LabelSmoothingLoss(nn.Layer):\n",
"\n",
" def __init__(self,\n",
......@@ -3016,7 +3081,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 184,
"id": "weighted-delight",
"metadata": {},
"outputs": [
......@@ -3034,7 +3099,7 @@
" [0.00002363, 0.00002363, 0.00002363, ..., 0.00002363, 0.00002363, 0.00002363],\n",
" [0.00002363, 0.00002363, 0.00002363, ..., 0.00002363, 0.00002363, 0.00002363]])\n",
"Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [41.84160995])\n",
" [41.84146118])\n",
"VarType.INT64\n"
]
}
......@@ -3049,7 +3114,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 286,
"id": "dress-shelter",
"metadata": {},
"outputs": [
......@@ -3058,7 +3123,7 @@
"output_type": "stream",
"text": [
"Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [41.84160995])\n",
" [41.84146118])\n",
"Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
" [41.84146118])\n",
"4233\n",
......@@ -3094,7 +3159,39 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": null,
"id": "going-hungary",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "naughty-citizenship",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "experimental-emerald",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "adverse-saskatchewan",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 27,
"id": "speaking-shelf",
"metadata": {},
"outputs": [],
......@@ -3261,7 +3358,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 28,
"id": "sharp-municipality",
"metadata": {},
"outputs": [],
......@@ -3351,7 +3448,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 29,
"id": "tutorial-syndication",
"metadata": {},
"outputs": [],
......@@ -3377,7 +3474,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 30,
"id": "fuzzy-register",
"metadata": {},
"outputs": [
......@@ -3397,7 +3494,55 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": null,
"id": "explicit-triumph",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "humanitarian-belgium",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "dying-proposal",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "honest-quick",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "bound-cholesterol",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "viral-packaging",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 203,
"id": "balanced-locator",
"metadata": {},
"outputs": [
......@@ -3431,7 +3576,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 204,
"id": "induced-proposition",
"metadata": {},
"outputs": [
......@@ -3499,7 +3644,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 205,
"id": "cutting-julian",
"metadata": {},
"outputs": [
......@@ -3833,7 +3978,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 206,
"id": "friendly-nightlife",
"metadata": {},
"outputs": [
......@@ -3940,7 +4085,15 @@
" ...,\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170],\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170],\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170]]])\n"
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170]]])\n",
"Tensor(shape=[1, 51, 256], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[[ 0. , 1. , 0. , ..., 1. , 0. , 1. ],\n",
" [ 0.84147102, 0.54030228, 0.80196184, ..., 1. , 0.00010746, 1. ],\n",
" [ 0.90929747, -0.41614681, 0.95814437, ..., 1. , 0.00021492, 1. ],\n",
" ...,\n",
" [-0.76825470, -0.64014435, 0.63279730, ..., 0.99998462, 0.00515809, 0.99998671],\n",
" [-0.95375264, 0.30059254, 0.99899054, ..., 0.99998397, 0.00526555, 0.99998611],\n",
" [-0.26237485, 0.96496606, 0.56074661, ..., 0.99998331, 0.00537301, 0.99998558]]])\n"
]
}
],
......@@ -3949,69 +4102,106 @@
"x = model.encoder.embed.out(x.transpose([0, 2, 1, 3]).reshape([b, t, c * f]))\n",
"print(x)\n",
"x, pos_emb = model.encoder.embed.pos_enc(x, 0)\n",
"print(x)"
"print(x)\n",
"print(pos_emb)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "exempt-cloud",
"execution_count": 207,
"id": "guilty-cache",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tensor(shape=[16, 51, 256], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
" [[[-0.54821998, 2.28660274, -1.07501972, ..., 1.45036042, 0.28950194, -0.69454080],\n",
" [-0.80125421, 1.76875579, -1.66388774, ..., 1.83315802, 0.67914939, -0.19995420],\n",
" [-1.71124649, 2.70574546, -1.33634126, ..., 1.23364413, 0.18697014, -0.57351983],\n",
" ...,\n",
" [-0.96968573, 2.31294894, -0.87524825, ..., 0.85838526, 0.48533469, -0.41773027],\n",
" [-1.36094308, 2.17788029, -1.78127730, ..., 2.09278774, 0.25282228, -0.36496443],\n",
" [-1.69674826, 2.35438418, -1.74168527, ..., 1.36695099, 0.59511113, -0.74147725]],\n",
"\n",
" [[-1.98284078, 2.31777000, -0.90785271, ..., 0.41170627, 0.50061619, 0.08721463],\n",
" [-0.76404583, 1.35577726, -1.36125672, ..., 0.73170459, 0.67842603, 0.16851945],\n",
" [-0.95044655, 1.60376561, -1.30299675, ..., 0.57544005, 0.26769355, 0.33433008],\n",
" ...,\n",
" [-1.47567701, 2.53171301, -1.23207152, ..., 1.29967308, 0.50191855, -0.10343577],\n",
" [-1.17308092, 2.31722355, -1.25421047, ..., 1.73911047, 0.21709818, -0.44447583],\n",
" [-1.26996231, 3.22289634, -0.88719147, ..., 1.64605021, 0.09731755, -0.76786882]],\n",
"\n",
" [[-0.58725590, 1.42905438, -1.39500988, ..., 0.21024795, 0.10272825, 0.09179455],\n",
" [ 0.17428070, 1.78342295, -1.64217877, ..., 0.81127012, 0.31371105, 0.56344515],\n",
" [-0.34916472, 1.83103430, -1.06851172, ..., 0.69243336, 0.13782299, 0.45937473],\n",
" ...,\n",
" [-1.08686376, 2.30020404, -1.26384079, ..., 1.79982817, 0.51338923, -0.52227837],\n",
" [-1.26144814, 2.72396612, -1.37337780, ..., 1.44453299, 0.57420933, -0.33201432],\n",
" [-2.20676827, 4.34621811, -3.82886696, ..., 2.14260173, 1.20336640, -1.37951219]],\n",
"\n",
" ...,\n",
"\n",
" [[-0.39141566, 1.85533464, -0.57471782, ..., 1.00623512, 0.46320182, -1.04523599],\n",
" [-0.86054784, 2.01717925, -1.44368529, ..., 1.45262301, 0.16571884, 0.59231722],\n",
" [-0.73066384, 2.28405023, -1.06989920, ..., 1.58249414, -0.09795550, 0.55030036],\n",
"Tensor(shape=[1, 51, 256], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[[ 0. , 1. , 0. , ..., 1. , 0. , 1. ],\n",
" [ 0.84147102, 0.54030228, 0.80196184, ..., 1. , 0.00010746, 1. ],\n",
" [ 0.90929747, -0.41614681, 0.95814437, ..., 1. , 0.00021492, 1. ],\n",
" ...,\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170],\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170],\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170]],\n",
"\n",
" [[-0.16194311, 0.62550521, -1.13234293, ..., 0.07242929, -0.22042468, 0.46362036],\n",
" [-0.08306468, 0.57504302, -1.09298003, ..., 0.91096652, -0.06501988, 0.72986233],\n",
" [-0.28202093, 0.08014385, -0.94177192, ..., 0.33794850, -0.11664233, 0.44514441],\n",
" ...,\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170],\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170],\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170]],\n",
"\n",
" [[-0.54584920, -0.69092435, -1.35965478, ..., -0.78182435, 0.68747747, 0.98427159],\n",
" [ 0.04212743, -1.10618520, -1.43891501, ..., -0.02385022, 0.91146135, 0.52870303],\n",
" [-0.29093450, -0.18858244, -1.54873240, ..., -0.13923697, 0.05795169, 0.30663735],\n",
" ...,\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170],\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170],\n",
" [-5.08208990, 8.59203339, -4.21366739, ..., 6.26925707, 0.05394945, -2.92699170]]])\n"
" [-0.76825470, -0.64014435, 0.63279730, ..., 0.99998462, 0.00515809, 0.99998671],\n",
" [-0.95375264, 0.30059254, 0.99899054, ..., 0.99998397, 0.00526555, 0.99998611],\n",
" [-0.26237485, 0.96496606, 0.56074661, ..., 0.99998331, 0.00537301, 0.99998558]]])\n"
]
}
],
"source": [
"print(pos_emb)"
]
},
{
"cell_type": "code",
"execution_count": 208,
"id": "iraqi-payday",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[[ 0.0000000e+00 1.0000000e+00 0.0000000e+00 ... 1.0000000e+00\n",
" 0.0000000e+00 1.0000000e+00]\n",
" [ 8.4147096e-01 5.4030234e-01 8.0196178e-01 ... 1.0000000e+00\n",
" 1.0746076e-04 1.0000000e+00]\n",
" [ 9.0929741e-01 -4.1614684e-01 9.5814437e-01 ... 1.0000000e+00\n",
" 2.1492151e-04 1.0000000e+00]\n",
" ...\n",
" [ 9.5625257e-01 -2.9254240e-01 4.8925215e-01 ... 8.3807874e-01\n",
" 5.1154459e-01 8.5925674e-01]\n",
" [ 2.7049953e-01 -9.6272010e-01 9.9170387e-01 ... 8.3801574e-01\n",
" 5.1163691e-01 8.5920173e-01]\n",
" [-6.6394955e-01 -7.4777740e-01 6.9544029e-01 ... 8.3795273e-01\n",
" 5.1172924e-01 8.5914677e-01]]]\n",
"[1, 5000, 256]\n"
]
}
],
"source": [
"import torch\n",
"import math\n",
"import numpy as np\n",
"\n",
"max_len=5000\n",
"d_model=256\n",
"\n",
"pe = torch.zeros(max_len, d_model)\n",
"position = torch.arange(0, max_len,\n",
" dtype=torch.float32).unsqueeze(1)\n",
"toruch_position = position\n",
"div_term = torch.exp(\n",
" torch.arange(0, d_model, 2, dtype=torch.float32) *\n",
" -(math.log(10000.0) / d_model))\n",
"tourch_div_term = div_term.cpu().detach().numpy()\n",
"\n",
"torhc_sin = torch.sin(position * div_term)\n",
"torhc_cos = torch.cos(position * div_term)\n",
"\n",
"np_sin = np.sin((position * div_term).cpu().detach().numpy())\n",
"np_cos = np.cos((position * div_term).cpu().detach().numpy())\n",
"pe[:, 0::2] = torhc_sin\n",
"pe[:, 1::2] = torhc_cos\n",
"pe = pe.unsqueeze(0) \n",
"tourch_pe = pe.cpu().detach().numpy()\n",
"print(tourch_pe)\n",
"bak_pe = model.encoder.embed.pos_enc.pe\n",
"print(bak_pe.shape)\n",
"model.encoder.embed.pos_enc.pe = paddle.to_tensor(tourch_pe)"
]
},
{
"cell_type": "code",
"execution_count": 210,
"id": "exempt-cloud",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n"
]
}
],
......@@ -4020,7 +4210,12 @@
"masks = make_non_pad_mask(feat_len).unsqueeze(1)\n",
"\n",
"xs, pos_emb, masks = model.encoder.embed(xs, masks.type_as(xs), offset=0)\n",
"print(xs)"
"#print(xs)\n",
"data = np.load(\".notebook/enc_embed.npz\")\n",
"torch_pos_emb=data['pos_emb']\n",
"torch_xs = data['embed_out']\n",
"print(np.allclose(xs.numpy(), torch_xs))\n",
"print(np.allclose(pos_emb.numpy(), torch_pos_emb))"
]
},
{
......@@ -4029,45 +4224,361 @@
"id": "composite-involvement",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 269,
"id": "handed-harris",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"False\n",
"True\n",
"[256, 2048]\n",
"[2048]\n",
"[2048, 256]\n",
"[256]\n",
"--------ff-------\n",
"True\n",
"False\n",
"False\n",
"False\n",
"False\n",
"True\n",
"linear_714.w_0 True\n",
"linear_714.b_0 True\n",
"linear_715.w_0 True\n",
"linear_715.b_0 True\n",
"False\n",
"True\n"
]
}
],
"source": [
"xs = model.encoder.global_cmvn(feat)\n",
"masks = make_non_pad_mask(feat_len).unsqueeze(1)\n",
"\n",
"xs, pos_emb, masks = model.encoder.embed(xs, masks.type_as(xs), offset=0)\n",
"masks = masks.astype(paddle.bool)\n",
"mask_pad = masks.logical_not()\n",
"decoding_chunk_size=0\n",
"num_decoding_left_chunks=-1\n",
"chunk_masks = add_optional_chunk_mask(\n",
" xs, masks, model.encoder.use_dynamic_chunk, model.encoder.use_dynamic_left_chunk,\n",
" decoding_chunk_size, model.encoder.static_chunk_size,\n",
" num_decoding_left_chunks)\n",
"\n",
"#print(chunk_masks)\n",
"data = np.load(\".notebook/enc_embed.npz\")\n",
"torch_pos_emb=data['pos_emb']\n",
"torch_xs = data['embed_out']\n",
"torch_chunk_masks = data['chunk_masks']\n",
"torch_mask_pad = data['mask_pad']\n",
"print(np.allclose(xs.numpy(), torch_xs))\n",
"print(np.allclose(pos_emb.numpy(), torch_pos_emb))\n",
"np.testing.assert_equal(chunk_masks.numpy(), torch_chunk_masks)\n",
"np.testing.assert_equal(mask_pad.numpy(), ~torch_mask_pad)\n",
"\n",
"for layer in model.encoder.encoders:\n",
" #xs, chunk_masks, _ = layer(xs, chunk_masks, pos_emb, mask_pad)\n",
" print(layer.feed_forward_macaron is not None)\n",
" print(layer.normalize_before)\n",
" \n",
" data = np.load('.notebook/enc_0_norm_ff.npz')\n",
" t_norm_ff = data['norm_ff']\n",
" t_xs = data['xs']\n",
" \n",
" \n",
" x = xs\n",
" print(np.allclose(t_xs, x.numpy()))\n",
" residual = x\n",
" print(np.allclose(t_xs, residual.numpy()))\n",
" x_nrom = layer.norm_ff_macaron(x)\n",
" print(np.allclose(t.numpy(), x_nrom.numpy()))\n",
" print(np.allclose(t_norm_ff, x_nrom.numpy()))\n",
"# for n, p in layer.norm_ff_macaron.state_dict().items():\n",
"# print(n, p)\n",
"# pass\n",
"\n",
" layer.eval()\n",
" x_nrom = paddle.to_tensor(t_norm_ff)\n",
" print(np.allclose(t_norm_ff, x_nrom.numpy()))\n",
" x = residual + layer.ff_scale * layer.feed_forward_macaron(x_nrom)\n",
" \n",
" ps=[]\n",
" for n, p in layer.feed_forward_macaron.state_dict().items():\n",
" #print(n, p)\n",
" ps.append(p)\n",
" print(p.shape)\n",
" pass\n",
"\n",
" x_nrom = paddle.to_tensor(t_norm_ff)\n",
" ff_l_x = layer.feed_forward_macaron.w_1(x_nrom)\n",
" ff_l_a_x = layer.feed_forward_macaron.activation(ff_l_x)\n",
" ff_l_a_l_x = layer.feed_forward_macaron.w_2(ff_l_a_x)\n",
" data = np.load('.notebook/enc_0_ff_out.npz', allow_pickle=True)\n",
" t_norm_ff = data['norm_ff']\n",
" t_ff_out = data['ff_out']\n",
" t_ff_l_x = data['ff_l_x']\n",
" t_ff_l_a_x = data['ff_l_a_x']\n",
" t_ff_l_a_l_x = data['ff_l_a_l_x']\n",
" t_ps = data['ps']\n",
" \n",
" print(\"--------ff-------\")\n",
" print(np.allclose(x_nrom.numpy(), t_norm_ff))\n",
" print(np.allclose(x.numpy(), t_ff_out))\n",
" print(np.allclose(ff_l_x.numpy(), t_ff_l_x))\n",
" print(np.allclose(ff_l_a_x.numpy(), t_ff_l_a_x))\n",
" print(np.allclose(ff_l_a_l_x.numpy(), t_ff_l_a_l_x))\n",
" \n",
" print(np.allclose(ff_l_x.numpy(), t_ff_l_x, atol=1e-6))\n",
" for p, t_p in zip(ps, t_ps):\n",
" print(p.name, np.allclose(p.numpy(), t_p.T))\n",
" \n",
" \n",
"# residual = x\n",
"# x = layer.norm_mha(x)\n",
"# x_q = x\n",
" \n",
" data = np.load('.notebook/enc_0_selattn_out.npz', allow_pickle=True)\n",
" tx_q = data['x_q']\n",
" tx = data['x']\n",
" tpos_emb=data['pos_emb']\n",
" tmask=data['mask']\n",
" tt_x_att=data['x_att']\n",
" x_q = paddle.to_tensor(tx_q)\n",
" x = paddle.to_tensor(tx)\n",
" pos_emb = paddle.to_tensor(tpos_emb)\n",
" mask = paddle.to_tensor(tmask)\n",
" \n",
" x_att = layer.self_attn(x_q, x, x, pos_emb, mask)\n",
" print(np.allclose(x_att.numpy(), t_x_att))\n",
" print(np.allclose(x_att.numpy(), t_x_att, atol=1e-6))\n",
" \n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 270,
"id": "sonic-thumb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n",
"False\n",
"True\n"
]
}
],
"source": [
"\n"
"xs = model.encoder.global_cmvn(feat)\n",
"masks = make_non_pad_mask(feat_len).unsqueeze(1)\n",
"\n",
"xs, pos_emb, masks = model.encoder.embed(xs, masks.type_as(xs), offset=0)\n",
"masks = masks.astype(paddle.bool)\n",
"mask_pad = masks.logical_not()\n",
"decoding_chunk_size=0\n",
"num_decoding_left_chunks=-1\n",
"chunk_masks = add_optional_chunk_mask(\n",
" xs, masks, model.encoder.use_dynamic_chunk, model.encoder.use_dynamic_left_chunk,\n",
" decoding_chunk_size, model.encoder.static_chunk_size,\n",
" num_decoding_left_chunks)\n",
"\n",
"#print(chunk_masks)\n",
"data = np.load(\".notebook/enc_embed.npz\")\n",
"torch_pos_emb=data['pos_emb']\n",
"torch_xs = data['embed_out']\n",
"torch_chunk_masks = data['chunk_masks']\n",
"torch_mask_pad = data['mask_pad']\n",
"print(np.allclose(xs.numpy(), torch_xs))\n",
"print(np.allclose(pos_emb.numpy(), torch_pos_emb))\n",
"np.testing.assert_equal(chunk_masks.numpy(), torch_chunk_masks)\n",
"np.testing.assert_equal(mask_pad.numpy(), ~torch_mask_pad)\n",
"\n",
"\n",
"for layer in model.encoder.encoders:\n",
" xs, chunk_masks, _ = layer(xs, chunk_masks, pos_emb, mask_pad)\n",
" break\n",
"data = np.load('.notebook/enc_0.npz')\n",
"torch_xs = data['enc_0']\n",
"print(np.allclose(xs.numpy(), torch_xs))\n",
"print(np.allclose(xs.numpy(), torch_xs, atol=1e-6))\n"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "handed-harris",
"execution_count": 273,
"id": "brave-latino",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n",
"--------layers_______\n",
"False\n",
"True\n",
"[[-0.70194244 0.56254214 0.6880346 ... 1.1237319 0.7803924\n",
" 1.1369387 ]\n",
" [-0.7787783 0.3912667 0.71887773 ... 1.251882 0.886168\n",
" 1.3173451 ]\n",
" [-0.95908964 0.6346029 0.87671334 ... 0.98183745 0.7440111\n",
" 1.2903278 ]\n",
" ...\n",
" [-1.0732255 0.67236906 0.92303115 ... 0.9075458 0.8176712\n",
" 1.3239655 ]\n",
" [-1.1654118 0.6819967 0.6939453 ... 1.2238353 0.8028295\n",
" 1.4506507 ]\n",
" [-1.2732092 0.7145806 0.75819594 ... 0.94154835 0.8774845\n",
" 1.2623049 ]]\n",
"xxxxxx\n",
"[[-0.7019424 0.56254166 0.6880345 ... 1.1237322 0.78039217\n",
" 1.1369387 ]\n",
" [-0.778778 0.39126638 0.7188779 ... 1.2518823 0.8861681\n",
" 1.3173454 ]\n",
" [-0.9590891 0.6346026 0.87671363 ... 0.9818373 0.74401116\n",
" 1.2903274 ]\n",
" ...\n",
" [-1.0732253 0.6723689 0.9230311 ... 0.9075457 0.8176713\n",
" 1.3239657 ]\n",
" [-1.165412 0.6819976 0.69394535 ... 1.2238353 0.80282927\n",
" 1.4506509 ]\n",
" [-1.273209 0.71458095 0.75819623 ... 0.9415484 0.8774842\n",
" 1.2623055 ]]\n"
]
}
],
"source": [
"xs = model.encoder.global_cmvn(feat)\n",
"masks = make_non_pad_mask(feat_len).unsqueeze(1)\n",
"\n",
"xs, pos_emb, masks = model.encoder.embed(xs, masks.type_as(xs), offset=0)\n",
"masks = masks.astype(paddle.bool)\n",
"mask_pad = masks.logical_not()\n",
"decoding_chunk_size=0\n",
"num_decoding_left_chunks=-1\n",
"chunk_masks = add_optional_chunk_mask(\n",
" xs, masks, model.encoder.use_dynamic_chunk, model.encoder.use_dynamic_left_chunk,\n",
" decoding_chunk_size, model.encoder.static_chunk_size,\n",
" num_decoding_left_chunks)\n",
"\n",
"#print(chunk_masks)\n",
"data = np.load(\".notebook/enc_embed.npz\")\n",
"torch_pos_emb=data['pos_emb']\n",
"torch_xs = data['embed_out']\n",
"torch_chunk_masks = data['chunk_masks']\n",
"torch_mask_pad = data['mask_pad']\n",
"print(np.allclose(xs.numpy(), torch_xs))\n",
"print(np.allclose(pos_emb.numpy(), torch_pos_emb))\n",
"np.testing.assert_equal(chunk_masks.numpy(), torch_chunk_masks)\n",
"np.testing.assert_equal(mask_pad.numpy(), ~torch_mask_pad)\n",
"\n",
"print(\"--------layers_______\")\n",
"i =0\n",
"for layer in model.encoder.encoders:\n",
" xs, chunk_masks, _ = layer(xs, chunk_masks, pos_emb, mask_pad)\n",
" i+=1\n",
"# if i == 2:\n",
"# data = np.load('.notebook/enc_2.npz')\n",
"# torch_xs = data['enc_2']\n",
"# print(np.allclose(xs.numpy(), torch_xs))\n",
"# print(np.allclose(xs.numpy(), torch_xs, atol=1e-5))\n",
"# print(xs[0].numpy())\n",
"# print('xxxxxx')\n",
"# print(torch_xs[0])\n",
"# print('----i==2')\n",
"data = np.load('.notebook/enc_all.npz')\n",
"torch_xs = data['enc_all']\n",
"print(np.allclose(xs.numpy(), torch_xs))\n",
"print(np.allclose(xs.numpy(), torch_xs, atol=1e-5))\n",
"print(xs[0].numpy())\n",
"print('xxxxxx')\n",
"print(torch_xs[0])"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "municipal-stock",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 278,
"id": "macro-season",
"metadata": {},
"outputs": [
{
"ename": "SystemError",
"evalue": "(Fatal) Operator elementwise_sub raises an paddle::memory::allocation::BadAlloc exception.\nThe exception content is\n:ResourceExhaustedError: \n\nOut of memory error on GPU 0. Cannot allocate 1.010986MB memory on GPU 0, available memory is only 6.437500MB.\n\nPlease check whether there is any other process using GPU 0.\n1. If yes, please stop them, or start PaddlePaddle on another GPU.\n2. If no, please decrease the batch size of your model. \n\n (at /paddle/paddle/fluid/memory/allocation/cuda_allocator.cc:69)\n. (at /paddle/paddle/fluid/imperative/tracer.cc:172)\n",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mSystemError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-43-fb4fc80a6da8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mencoder_out\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoder_mask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeat_len\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencoder_out\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencoder_mask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mencoder_out\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch_encoder_out\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/dygraph/layers.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 900\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_built\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 902\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 903\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 904\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mforward_post_hook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_post_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/workspace/DeepSpeech-2.x/deepspeech/modules/encoder.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, xs, xs_lens, decoding_chunk_size, num_decoding_left_chunks)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mglobal_cmvn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m \u001b[0mxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mglobal_cmvn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 161\u001b[0m \u001b[0;31m#TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpos_emb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmasks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmasks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype_as\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moffset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/dygraph/layers.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 900\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_built\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 902\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 903\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 904\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mforward_post_hook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_post_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/workspace/DeepSpeech-2.x/deepspeech/modules/cmvn.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpaddle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnormalized\u001b[0m \u001b[0mfeature\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 47\u001b[0m \"\"\"\n\u001b[0;32m---> 48\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 49\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm_var\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mistd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/dygraph/math_op_patch.py\u001b[0m in \u001b[0;36m__impl__\u001b[0;34m(self, other_var)\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0mmath_op\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mops\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 249\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmath_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother_var\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'axis'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 250\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0mcomment\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOpProtoHolder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_op_proto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcomment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mSystemError\u001b[0m: (Fatal) Operator elementwise_sub raises an paddle::memory::allocation::BadAlloc exception.\nThe exception content is\n:ResourceExhaustedError: \n\nOut of memory error on GPU 0. Cannot allocate 1.010986MB memory on GPU 0, available memory is only 6.437500MB.\n\nPlease check whether there is any other process using GPU 0.\n1. If yes, please stop them, or start PaddlePaddle on another GPU.\n2. If no, please decrease the batch size of your model. \n\n (at /paddle/paddle/fluid/memory/allocation/cuda_allocator.cc:69)\n. (at /paddle/paddle/fluid/imperative/tracer.cc:172)\n"
"name": "stdout",
"output_type": "stream",
"text": [
"[[-0.7019424 0.5625421 0.68803453 ... 1.1237317 0.7803923\n",
" 1.1369386 ]\n",
" [-0.7787783 0.39126673 0.71887773 ... 1.251882 0.886168\n",
" 1.3173451 ]\n",
" [-0.95908964 0.6346029 0.87671334 ... 0.98183745 0.7440111\n",
" 1.2903278 ]\n",
" ...\n",
" [-1.0732255 0.67236906 0.92303115 ... 0.9075458 0.8176712\n",
" 1.3239655 ]\n",
" [-1.1654117 0.68199664 0.6939452 ... 1.2238352 0.8028294\n",
" 1.4506506 ]\n",
" [-1.2732091 0.71458054 0.7581958 ... 0.9415482 0.8774844\n",
" 1.2623048 ]]\n",
"---\n",
"[[-0.7019424 0.56254166 0.6880345 ... 1.1237322 0.78039217\n",
" 1.1369387 ]\n",
" [-0.778778 0.39126638 0.7188779 ... 1.2518823 0.8861681\n",
" 1.3173454 ]\n",
" [-0.9590891 0.6346026 0.87671363 ... 0.9818373 0.74401116\n",
" 1.2903274 ]\n",
" ...\n",
" [-1.0732253 0.6723689 0.9230311 ... 0.9075457 0.8176713\n",
" 1.3239657 ]\n",
" [-1.165412 0.6819976 0.69394535 ... 1.2238353 0.80282927\n",
" 1.4506509 ]\n",
" [-1.2732087 0.71458083 0.7581961 ... 0.9415482 0.877484\n",
" 1.2623053 ]]\n",
"False\n",
"True\n",
"False\n"
]
}
],
"source": [
"encoder_out, encoder_mask = model.encoder(feat, feat_len)\n",
"print(encoder_out.shape)\n",
"print(encoder_mask.shape)\n",
"print(encoder_out[0])\n",
"print(torch_encoder_out[0])"
"encoder_out, mask = model.encoder(feat, feat_len)\n",
"print(encoder_out.numpy()[0])\n",
"print(\"---\")\n",
"print(torch_encoder_out[0])\n",
"print(np.allclose(torch_encoder_out, encoder_out.numpy()))\n",
"print(np.allclose(torch_encoder_out, encoder_out.numpy(), atol=1e-5))\n",
"print(np.allclose(torch_encoder_out, encoder_out.numpy(), atol=1e-6))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "sonic-thumb",
"id": "associate-sampling",
"metadata": {},
"outputs": [],
"source": []
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册