position_embeding_check.ipynb 4.3 KB
Notebook
Newer Older
H
Hui Zhang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "designing-borough",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00\n",
      "   0.0000000e+00  0.0000000e+00]\n",
      " [ 8.4147096e-01  8.0196178e-01  7.6172036e-01 ...  1.2409373e-04\n",
      "   1.1547816e-04  1.0746076e-04]\n",
      " [ 9.0929741e-01  9.5814437e-01  9.8704624e-01 ...  2.4818745e-04\n",
      "   2.3095631e-04  2.1492151e-04]\n",
      " ...\n",
      " [ 3.7960774e-01  7.4510968e-01  7.3418564e-01 ...  1.2036801e-02\n",
      "   1.1201146e-02  1.0423505e-02]\n",
      " [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ...  1.2160885e-02\n",
      "   1.1316618e-02  1.0530960e-02]\n",
      " [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ...  1.2284970e-02\n",
      "   1.1432089e-02  1.0638415e-02]]\n",
      "True\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import math\n",
    "import numpy as np\n",
    "\n",
    "max_len=100\n",
    "d_model=256\n",
    "\n",
    "pe = torch.zeros(max_len, d_model)\n",
    "position = torch.arange(0, max_len,\n",
    "                        dtype=torch.float32).unsqueeze(1)\n",
    "toruch_position = position\n",
    "div_term = torch.exp(\n",
    "    torch.arange(0, d_model, 2, dtype=torch.float32) *\n",
    "    -(math.log(10000.0) / d_model))\n",
    "tourch_div_term = div_term.cpu().detach().numpy()\n",
    "\n",
    "\n",
    "\n",
    "torhc_sin = torch.sin(position * div_term)\n",
    "torhc_cos = torch.cos(position * div_term)\n",
    "print(torhc_sin.cpu().detach().numpy())\n",
    "np_sin = np.sin((position * div_term).cpu().detach().numpy())\n",
    "np_cos = np.cos((position * div_term).cpu().detach().numpy())\n",
    "print(np.allclose(np_sin, torhc_sin.cpu().detach().numpy()))\n",
    "print(np.allclose(np_cos, torhc_cos.cpu().detach().numpy()))\n",
    "pe[:, 0::2] = torhc_sin\n",
    "pe[:, 1::2] = torhc_cos\n",
    "tourch_pe = pe.cpu().detach().numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "swiss-referral",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "True\n",
      "False\n",
      "False\n",
      "False\n",
      "False\n"
     ]
    }
   ],
   "source": [
    "import paddle\n",
    "ppe = paddle.zeros([max_len, d_model])\n",
    "position = paddle.arange(0, max_len,\n",
    "                        dtype='float32').unsqueeze(1)\n",
    "print(np.allclose(position.numpy(), toruch_position))\n",
    "div_term = paddle.exp(\n",
    "    paddle.arange(0, d_model, 2, dtype='float32') *\n",
    "    -(math.log(10000.0) / d_model))\n",
    "print(np.allclose(div_term.numpy(), tourch_div_term))\n",
    "\n",
    "\n",
    "\n",
    "p_sin = paddle.sin(position * div_term)\n",
    "p_cos = paddle.cos(position * div_term)\n",
    "print(np.allclose(np_sin, p_sin.numpy(), rtol=1.e-6, atol=0))\n",
    "print(np.allclose(np_cos, p_cos.numpy(), rtol=1.e-6, atol=0))\n",
    "ppe[:, 0::2] = p_sin\n",
    "ppe[:, 1::2] = p_cos\n",
    "print(np.allclose(p_sin.numpy(), torhc_sin.cpu().detach().numpy()))\n",
    "print(np.allclose(p_cos.numpy(), torhc_cos.cpu().detach().numpy()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "integrated-boards",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\n"
     ]
    }
   ],
   "source": [
    "print(np.allclose(ppe.numpy(), pe.numpy()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "flying-reserve",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "revised-divide",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}