{ "cells": [ { "cell_type": "code", "execution_count": 29, "id": "designing-borough", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0.0000000e+00 0.0000000e+00 0.0000000e+00 ... 0.0000000e+00\n", " 0.0000000e+00 0.0000000e+00]\n", " [ 8.4147096e-01 8.0196178e-01 7.6172036e-01 ... 1.2409373e-04\n", " 1.1547816e-04 1.0746076e-04]\n", " [ 9.0929741e-01 9.5814437e-01 9.8704624e-01 ... 2.4818745e-04\n", " 2.3095631e-04 2.1492151e-04]\n", " ...\n", " [ 3.7960774e-01 7.4510968e-01 7.3418564e-01 ... 1.2036801e-02\n", " 1.1201146e-02 1.0423505e-02]\n", " [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ... 1.2160885e-02\n", " 1.1316618e-02 1.0530960e-02]\n", " [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ... 1.2284970e-02\n", " 1.1432089e-02 1.0638415e-02]]\n", "True\n", "True\n" ] } ], "source": [ "import torch\n", "import math\n", "import numpy as np\n", "\n", "max_len=100\n", "d_model=256\n", "\n", "pe = torch.zeros(max_len, d_model)\n", "position = torch.arange(0, max_len,\n", " dtype=torch.float32).unsqueeze(1)\n", "toruch_position = position\n", "div_term = torch.exp(\n", " torch.arange(0, d_model, 2, dtype=torch.float32) *\n", " -(math.log(10000.0) / d_model))\n", "tourch_div_term = div_term.cpu().detach().numpy()\n", "\n", "\n", "\n", "torhc_sin = torch.sin(position * div_term)\n", "torhc_cos = torch.cos(position * div_term)\n", "print(torhc_sin.cpu().detach().numpy())\n", "np_sin = np.sin((position * div_term).cpu().detach().numpy())\n", "np_cos = np.cos((position * div_term).cpu().detach().numpy())\n", "print(np.allclose(np_sin, torhc_sin.cpu().detach().numpy()))\n", "print(np.allclose(np_cos, torhc_cos.cpu().detach().numpy()))\n", "pe[:, 0::2] = torhc_sin\n", "pe[:, 1::2] = torhc_cos\n", "tourch_pe = pe.cpu().detach().numpy()" ] }, { "cell_type": "code", "execution_count": 37, "id": "swiss-referral", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "True\n", "True\n", "False\n", "False\n", "False\n", "False\n" ] } ], "source": [ "import paddle\n", "ppe = paddle.zeros([max_len, d_model])\n", "position = paddle.arange(0, max_len,\n", " dtype='float32').unsqueeze(1)\n", "print(np.allclose(position.numpy(), toruch_position))\n", "div_term = paddle.exp(\n", " paddle.arange(0, d_model, 2, dtype='float32') *\n", " -(math.log(10000.0) / d_model))\n", "print(np.allclose(div_term.numpy(), tourch_div_term))\n", "\n", "\n", "\n", "p_sin = paddle.sin(position * div_term)\n", "p_cos = paddle.cos(position * div_term)\n", "print(np.allclose(np_sin, p_sin.numpy(), rtol=1.e-6, atol=0))\n", "print(np.allclose(np_cos, p_cos.numpy(), rtol=1.e-6, atol=0))\n", "ppe[:, 0::2] = p_sin\n", "ppe[:, 1::2] = p_cos\n", "print(np.allclose(p_sin.numpy(), torhc_sin.cpu().detach().numpy()))\n", "print(np.allclose(p_cos.numpy(), torhc_cos.cpu().detach().numpy()))" ] }, { "cell_type": "code", "execution_count": 34, "id": "integrated-boards", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "False\n" ] } ], "source": [ "print(np.allclose(ppe.numpy(), pe.numpy()))" ] }, { "cell_type": "code", "execution_count": null, "id": "flying-reserve", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "revised-divide", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 5 }