From 43c147dad13e9304c9fa957a83660c55f45ee0c3 Mon Sep 17 00:00:00 2001 From: softpo <455098435@qq.com> Date: Thu, 16 May 2019 11:59:06 +0800 Subject: [PATCH] =?UTF-8?q?'pandas=E4=BD=BF=E7=94=A8'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...as\347\232\204\345\272\224\347\224\250.md" | 23 + .../1-pandas\345\205\245\351\227\250.ipynb" | 631 ++ .../2-pandas-\347\264\242\345\274\225.ipynb" | 2082 ++++++ ...347\251\272\346\225\260\346\215\256.ipynb" | 6285 +++++++++++++++++ ...345\261\202\347\264\242\345\274\225.ipynb" | 568 ++ ...345\274\225\350\256\241\347\256\227.ipynb" | 1000 +++ ...346\215\256\351\233\206\346\210\220.ipynb" | 1209 ++++ ...15\256\351\233\206\346\210\220merge.ipynb" | 1272 ++++ ...345\220\210\346\223\215\344\275\234.ipynb" | 877 +++ ...346\210\220\345\256\236\346\210\230.ipynb" | 6213 ++++++++++++++++ Day76-90/code/cancer_predict.npy | Bin 0 -> 450 bytes Day76-90/code/cancer_true.npy | Bin 0 -> 450 bytes Day76-90/code/state-abbrevs.csv | 52 + Day76-90/code/state-areas.csv | 53 + Day76-90/code/state-population.csv | 2545 +++++++ 15 files changed, 22810 insertions(+) create mode 100644 "Day76-90/code/1-pandas\345\205\245\351\227\250.ipynb" create mode 100644 "Day76-90/code/2-pandas-\347\264\242\345\274\225.ipynb" create mode 100644 "Day76-90/code/3-pandas\346\225\260\346\215\256\346\270\205\346\264\227\344\271\213\347\251\272\346\225\260\346\215\256.ipynb" create mode 100644 "Day76-90/code/4-pandas\345\244\232\345\261\202\347\264\242\345\274\225.ipynb" create mode 100644 "Day76-90/code/5-pandas\345\244\232\345\261\202\347\264\242\345\274\225\350\256\241\347\256\227.ipynb" create mode 100644 "Day76-90/code/6-pandas\346\225\260\346\215\256\351\233\206\346\210\220.ipynb" create mode 100644 "Day76-90/code/7-pandas\346\225\260\346\215\256\351\233\206\346\210\220merge.ipynb" create mode 100644 "Day76-90/code/8-pandas\345\210\206\347\273\204\350\201\232\345\220\210\346\223\215\344\275\234.ipynb" create mode 100644 "Day76-90/code/9-pandas\346\225\260\346\215\256\351\233\206\346\210\220\345\256\236\346\210\230.ipynb" create mode 100644 Day76-90/code/cancer_predict.npy create mode 100644 Day76-90/code/cancer_true.npy create mode 100644 Day76-90/code/state-abbrevs.csv create mode 100644 Day76-90/code/state-areas.csv create mode 100644 Day76-90/code/state-population.csv diff --git "a/Day76-90/02.Pandas\347\232\204\345\272\224\347\224\250.md" "b/Day76-90/02.Pandas\347\232\204\345\272\224\347\224\250.md" index 9a101cf..3115ad5 100644 --- "a/Day76-90/02.Pandas\347\232\204\345\272\224\347\224\250.md" +++ "b/Day76-90/02.Pandas\347\232\204\345\272\224\347\224\250.md" @@ -1,2 +1,25 @@ ## Pandas的应用 +### 1、pandas入门 + +### 2、pandas索引 + +### 3、pandas数据清洗之空数据 + +[数据挖掘之空数据处理(有史以来最全面)]: https://blog.csdn.net/Soft_Po/article/details/89302887 + +### 4、pandas多层索引 + +### 5、pandas多层索引计算 + +### 6、pandas数据集成concat + +### 7、pandas数据集成merge + +### 8、pandas分组聚合操作 + +### 9、pandas数据集成实战 + +### 10、美国大选项目 + +[2012美国大选政治献金项目数据分析(有史以来最全面)]: https://blog.csdn.net/Soft_Po/article/details/89283382 \ No newline at end of file diff --git "a/Day76-90/code/1-pandas\345\205\245\351\227\250.ipynb" "b/Day76-90/code/1-pandas\345\205\245\351\227\250.ipynb" new file mode 100644 index 0000000..2f25670 --- /dev/null +++ "b/Day76-90/code/1-pandas\345\205\245\351\227\250.ipynb" @@ -0,0 +1,631 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Math 120\n", + "Python 136\n", + "En 128\n", + "Chinese 99\n", + "dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 创建\n", + "# Series是一维的数据\n", + "s = Series(data = [120,136,128,99],index = ['Math','Python','En','Chinese'])\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4,)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([120, 136, 128, 99], dtype=int64)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v = s.values\n", + "v" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.ndarray" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(v)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "120.75" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "136" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "15.903353943953666" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Math 14400\n", + "Python 18496\n", + "En 16384\n", + "Chinese 9801\n", + "dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.pow(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
a11311675
b1914523
c57107113
d95366
e28121120
f14185132
h1243910
i803517
j689931
k741211
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "a 113 116 75\n", + "b 19 145 23\n", + "c 57 107 113\n", + "d 95 3 66\n", + "e 28 121 120\n", + "f 141 85 132\n", + "h 124 39 10\n", + "i 80 35 17\n", + "j 68 99 31\n", + "k 74 12 11" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# DataFrame是二维的数据\n", + "# excel就非诚相似\n", + "# 所有进行数据分析,数据挖掘的工具最基础的结果:行和列,行表示样本,列表示的是属性\n", + "df = DataFrame(data = np.random.randint(0,150,size = (10,3)),index = list('abcdefhijk'),columns=['Python','En','Math'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 3)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[113, 116, 75],\n", + " [ 19, 145, 23],\n", + " [ 57, 107, 113],\n", + " [ 95, 3, 66],\n", + " [ 28, 121, 120],\n", + " [141, 85, 132],\n", + " [124, 39, 10],\n", + " [ 80, 35, 17],\n", + " [ 68, 99, 31],\n", + " [ 74, 12, 11]])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "v = df.values\n", + "v" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 79.9\n", + "En 76.2\n", + "Math 59.8\n", + "dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 141\n", + "En 145\n", + "Math 132\n", + "dtype: int32" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
a11311675
b1914523
c57107113
d95366
e28121120
f14185132
h1243910
i803517
j689931
k741211
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "a 113 116 75\n", + "b 19 145 23\n", + "c 57 107 113\n", + "d 95 3 66\n", + "e 28 121 120\n", + "f 141 85 132\n", + "h 124 39 10\n", + "i 80 35 17\n", + "j 68 99 31\n", + "k 74 12 11" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 79.9\n", + "En 76.2\n", + "Math 59.8\n", + "dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mean(axis = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 101.333333\n", + "b 62.333333\n", + "c 92.333333\n", + "d 54.666667\n", + "e 89.666667\n", + "f 119.333333\n", + "h 57.666667\n", + "i 44.000000\n", + "j 66.000000\n", + "k 32.333333\n", + "dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mean(axis = 1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git "a/Day76-90/code/2-pandas-\347\264\242\345\274\225.ipynb" "b/Day76-90/code/2-pandas-\347\264\242\345\274\225.ipynb" new file mode 100644 index 0000000..ddbde0a --- /dev/null +++ "b/Day76-90/code/2-pandas-\347\264\242\345\274\225.ipynb" @@ -0,0 +1,2082 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "\n", + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10 34\n", + "11 111\n", + "12 113\n", + "13 103\n", + "14 147\n", + "15 63\n", + "16 11\n", + "17 130\n", + "18 38\n", + "19 17\n", + "20 32\n", + "21 112\n", + "22 75\n", + "23 68\n", + "24 124\n", + "25 138\n", + "26 56\n", + "27 1\n", + "28 88\n", + "29 113\n", + "30 63\n", + "31 42\n", + "32 65\n", + "33 104\n", + "34 105\n", + "35 0\n", + "36 95\n", + "37 119\n", + "38 86\n", + "39 124\n", + " ... \n", + "80 127\n", + "81 139\n", + "82 110\n", + "83 65\n", + "84 127\n", + "85 108\n", + "86 33\n", + "87 91\n", + "88 134\n", + "89 65\n", + "90 110\n", + "91 144\n", + "92 40\n", + "93 3\n", + "94 3\n", + "95 59\n", + "96 97\n", + "97 64\n", + "98 126\n", + "99 94\n", + "100 20\n", + "101 107\n", + "102 59\n", + "103 146\n", + "104 83\n", + "105 59\n", + "106 25\n", + "107 0\n", + "108 78\n", + "109 93\n", + "Name: Python, Length: 100, dtype: int16" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = Series(np.random.randint(0,150,size = 100),index = np.arange(10,110),dtype=np.int16,name = 'Python')\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "ename": "KeyError", + "evalue": "0", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# 索引操作\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0ms\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 866\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 867\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 868\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 869\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 870\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_value\u001b[1;34m(self, series, key)\u001b[0m\n\u001b[0;32m 4373\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4374\u001b[0m return self._engine.get_value(s, k,\n\u001b[1;32m-> 4375\u001b[1;33m tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[0;32m 4376\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4377\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mholds_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_boolean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 0" + ] + } + ], + "source": [ + "# 索引操作\n", + "s[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "34" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s[10]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10 34\n", + "20 32\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s[[10,20]]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "20 32\n", + "21 112\n", + "22 75\n", + "23 68\n", + "24 124\n", + "25 138\n", + "26 56\n", + "27 1\n", + "28 88\n", + "29 113\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 切片操作\n", + "s[10:20]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10 34\n", + "12 113\n", + "14 147\n", + "16 11\n", + "18 38\n", + "20 32\n", + "22 75\n", + "24 124\n", + "26 56\n", + "28 88\n", + "30 63\n", + "32 65\n", + "34 105\n", + "36 95\n", + "38 86\n", + "40 6\n", + "42 57\n", + "44 72\n", + "46 43\n", + "48 87\n", + "50 83\n", + "52 99\n", + "54 132\n", + "56 17\n", + "58 116\n", + "60 33\n", + "62 51\n", + "64 80\n", + "66 121\n", + "68 81\n", + "70 0\n", + "72 50\n", + "74 31\n", + "76 114\n", + "78 60\n", + "80 127\n", + "82 110\n", + "84 127\n", + "86 33\n", + "88 134\n", + "90 110\n", + "92 40\n", + "94 3\n", + "96 97\n", + "98 126\n", + "100 20\n", + "102 59\n", + "104 83\n", + "106 25\n", + "108 78\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s[::2]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "109 93\n", + "107 0\n", + "105 59\n", + "103 146\n", + "101 107\n", + "99 94\n", + "97 64\n", + "95 59\n", + "93 3\n", + "91 144\n", + "89 65\n", + "87 91\n", + "85 108\n", + "83 65\n", + "81 139\n", + "79 14\n", + "77 96\n", + "75 76\n", + "73 29\n", + "71 68\n", + "69 4\n", + "67 57\n", + "65 58\n", + "63 106\n", + "61 42\n", + "59 135\n", + "57 56\n", + "55 12\n", + "53 135\n", + "51 74\n", + "49 129\n", + "47 110\n", + "45 1\n", + "43 90\n", + "41 120\n", + "39 124\n", + "37 119\n", + "35 0\n", + "33 104\n", + "31 42\n", + "29 113\n", + "27 1\n", + "25 138\n", + "23 68\n", + "21 112\n", + "19 17\n", + "17 130\n", + "15 63\n", + "13 103\n", + "11 111\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s[::-2]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "34" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 可以使用pandas为开发者提供方法,去进行检索\n", + "s.loc[10]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10 34\n", + "20 32\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.loc[[10,20]]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10 34\n", + "11 111\n", + "12 113\n", + "13 103\n", + "14 147\n", + "15 63\n", + "16 11\n", + "17 130\n", + "18 38\n", + "19 17\n", + "20 32\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.loc[10:20]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10 34\n", + "12 113\n", + "14 147\n", + "16 11\n", + "18 38\n", + "20 32\n", + "22 75\n", + "24 124\n", + "26 56\n", + "28 88\n", + "30 63\n", + "32 65\n", + "34 105\n", + "36 95\n", + "38 86\n", + "40 6\n", + "42 57\n", + "44 72\n", + "46 43\n", + "48 87\n", + "50 83\n", + "52 99\n", + "54 132\n", + "56 17\n", + "58 116\n", + "60 33\n", + "62 51\n", + "64 80\n", + "66 121\n", + "68 81\n", + "70 0\n", + "72 50\n", + "74 31\n", + "76 114\n", + "78 60\n", + "80 127\n", + "82 110\n", + "84 127\n", + "86 33\n", + "88 134\n", + "90 110\n", + "92 40\n", + "94 3\n", + "96 97\n", + "98 126\n", + "100 20\n", + "102 59\n", + "104 83\n", + "106 25\n", + "108 78\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.loc[::2]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "109 93\n", + "107 0\n", + "105 59\n", + "103 146\n", + "101 107\n", + "99 94\n", + "97 64\n", + "95 59\n", + "93 3\n", + "91 144\n", + "89 65\n", + "87 91\n", + "85 108\n", + "83 65\n", + "81 139\n", + "79 14\n", + "77 96\n", + "75 76\n", + "73 29\n", + "71 68\n", + "69 4\n", + "67 57\n", + "65 58\n", + "63 106\n", + "61 42\n", + "59 135\n", + "57 56\n", + "55 12\n", + "53 135\n", + "51 74\n", + "49 129\n", + "47 110\n", + "45 1\n", + "43 90\n", + "41 120\n", + "39 124\n", + "37 119\n", + "35 0\n", + "33 104\n", + "31 42\n", + "29 113\n", + "27 1\n", + "25 138\n", + "23 68\n", + "21 112\n", + "19 17\n", + "17 130\n", + "15 63\n", + "13 103\n", + "11 111\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.loc[::-2]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,\n", + " 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,\n", + " 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,\n", + " 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,\n", + " 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,\n", + " 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,\n", + " 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,\n", + " 101, 102, 103, 104, 105, 106, 107, 108, 109],\n", + " dtype='int64')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.index" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "34" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# iloc 索引从0开始,数字化自然索引\n", + "s.iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10 34\n", + "20 32\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.iloc[[0,10]]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10 34\n", + "11 111\n", + "12 113\n", + "13 103\n", + "14 147\n", + "15 63\n", + "16 11\n", + "17 130\n", + "18 38\n", + "19 17\n", + "20 32\n", + "21 112\n", + "22 75\n", + "23 68\n", + "24 124\n", + "25 138\n", + "26 56\n", + "27 1\n", + "28 88\n", + "29 113\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.iloc[0:20]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "109 93\n", + "107 0\n", + "105 59\n", + "103 146\n", + "101 107\n", + "99 94\n", + "97 64\n", + "95 59\n", + "93 3\n", + "91 144\n", + "89 65\n", + "87 91\n", + "85 108\n", + "83 65\n", + "81 139\n", + "79 14\n", + "77 96\n", + "75 76\n", + "73 29\n", + "71 68\n", + "69 4\n", + "67 57\n", + "65 58\n", + "63 106\n", + "61 42\n", + "59 135\n", + "57 56\n", + "55 12\n", + "53 135\n", + "51 74\n", + "49 129\n", + "47 110\n", + "45 1\n", + "43 90\n", + "41 120\n", + "39 124\n", + "37 119\n", + "35 0\n", + "33 104\n", + "31 42\n", + "29 113\n", + "27 1\n", + "25 138\n", + "23 68\n", + "21 112\n", + "19 17\n", + "17 130\n", + "15 63\n", + "13 103\n", + "11 111\n", + "Name: Python, dtype: int16" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.iloc[::-2]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A1035698
B13565135
C1379146
D4724145
E899716
F6426109
H4846111
I164997
J122126100
K6013662
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 103 56 98\n", + "B 135 65 135\n", + "C 13 79 146\n", + "D 47 24 145\n", + "E 89 97 16\n", + "F 64 26 109\n", + "H 48 46 111\n", + "I 16 49 97\n", + "J 122 126 100\n", + "K 60 136 62" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# DataFrame是二维,索引大同小异,\n", + "df = DataFrame(data = np.random.randint(0,150,size= (10,3)),index=list('ABCDEFHIJK'),columns=['Python','En','Math'])\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'A'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2656\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2657\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2658\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'A'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'A'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 2925\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2926\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2927\u001b[1;33m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2928\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2929\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2657\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2658\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2659\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2660\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2661\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'A'" + ] + } + ], + "source": [ + "df['A']" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "A 103\n", + "B 135\n", + "C 13\n", + "D 47\n", + "E 89\n", + "F 64\n", + "H 48\n", + "I 16\n", + "J 122\n", + "K 60\n", + "Name: Python, dtype: int32" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Python']" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEn
A10356
B13565
C1379
D4724
E8997
F6426
H4846
I1649
J122126
K60136
\n", + "
" + ], + "text/plain": [ + " Python En\n", + "A 103 56\n", + "B 135 65\n", + "C 13 79\n", + "D 47 24\n", + "E 89 97\n", + "F 64 26\n", + "H 48 46\n", + "I 16 49\n", + "J 122 126\n", + "K 60 136" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['Python','En']]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Python, En, Math]\n", + "Index: []" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Python':'Math']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A1035698
B13565135
C1379146
D4724145
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 103 56 98\n", + "B 135 65 135\n", + "C 13 79 146\n", + "D 47 24 145" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['A':'D']" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Python'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2656\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2657\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2658\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'Python'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Python'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1498\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1499\u001b[0m \u001b[0mmaybe_callable\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1500\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmaybe_callable\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1501\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1502\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m 1911\u001b[0m \u001b[1;31m# fall thru to straight lookup\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1912\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_key\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1913\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_label\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1914\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1915\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_get_label\u001b[1;34m(self, label, axis)\u001b[0m\n\u001b[0;32m 139\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mIndexingError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'no slices here, handle elsewhere'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 140\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 141\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_xs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 142\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 143\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_get_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mxs\u001b[1;34m(self, key, axis, level, drop_level)\u001b[0m\n\u001b[0;32m 3583\u001b[0m drop_level=drop_level)\n\u001b[0;32m 3584\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3585\u001b[1;33m \u001b[0mloc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3586\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3587\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m 2657\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2658\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2659\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2660\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2661\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'Python'" + ] + } + ], + "source": [ + "df.loc['Python']" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 103\n", + "En 56\n", + "Math 98\n", + "Name: A, dtype: int32" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['A']" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A1035698
H4846111
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 103 56 98\n", + "H 48 46 111" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[['A','H']]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A1035698
B13565135
C1379146
D4724145
E899716
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 103 56 98\n", + "B 135 65 135\n", + "C 13 79 146\n", + "D 47 24 145\n", + "E 89 97 16" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['A':'E']" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A1035698
C1379146
E899716
H4846111
J122126100
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 103 56 98\n", + "C 13 79 146\n", + "E 89 97 16\n", + "H 48 46 111\n", + "J 122 126 100" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[::2]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
K6013662
I164997
F6426109
D4724145
B13565135
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "K 60 136 62\n", + "I 16 49 97\n", + "F 64 26 109\n", + "D 47 24 145\n", + "B 135 65 135" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[::-2]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Cannot index by location index with a non-integer key", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'A'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1498\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1499\u001b[0m \u001b[0mmaybe_callable\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1500\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmaybe_callable\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1501\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1502\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32md:\\python36\\lib\\site-packages\\pandas\\core\\indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m 2224\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2225\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2226\u001b[1;33m raise TypeError(\"Cannot index by location index with a \"\n\u001b[0m\u001b[0;32m 2227\u001b[0m \"non-integer key\")\n\u001b[0;32m 2228\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mTypeError\u001b[0m: Cannot index by location index with a non-integer key" + ] + } + ], + "source": [ + "df.iloc['A']" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 103\n", + "En 56\n", + "Math 98\n", + "Name: A, dtype: int32" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A1035698
F6426109
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 103 56 98\n", + "F 64 26 109" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[[0,5]]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A1035698
B13565135
C1379146
D4724145
E899716
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 103 56 98\n", + "B 135 65 135\n", + "C 13 79 146\n", + "D 47 24 145\n", + "E 89 97 16" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
K6013662
I164997
F6426109
D4724145
B13565135
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "K 60 136 62\n", + "I 16 49 97\n", + "F 64 26 109\n", + "D 47 24 145\n", + "B 135 65 135" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[::-2]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A1035698
B13565135
C1379146
D4724145
E899716
F6426109
H4846111
I164997
J122126100
K6013662
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 103 56 98\n", + "B 135 65 135\n", + "C 13 79 146\n", + "D 47 24 145\n", + "E 89 97 16\n", + "F 64 26 109\n", + "H 48 46 111\n", + "I 16 49 97\n", + "J 122 126 100\n", + "K 60 136 62" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EnMath
A5698
C79146
E9716
H46111
J126100
\n", + "
" + ], + "text/plain": [ + " En Math\n", + "A 56 98\n", + "C 79 146\n", + "E 97 16\n", + "H 46 111\n", + "J 126 100" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[::2,1:]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git "a/Day76-90/code/3-pandas\346\225\260\346\215\256\346\270\205\346\264\227\344\271\213\347\251\272\346\225\260\346\215\256.ipynb" "b/Day76-90/code/3-pandas\346\225\260\346\215\256\346\270\205\346\264\227\344\271\213\347\251\272\346\225\260\346\215\256.ipynb" new file mode 100644 index 0000000..dde9ec6 --- /dev/null +++ "b/Day76-90/code/3-pandas\346\225\260\346\215\256\346\270\205\346\264\227\344\271\213\347\251\272\346\225\260\346\215\256.ipynb" @@ -0,0 +1,6285 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "\n", + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
1001221052857
101741291611426
102971211222965
103141731201471
1041261328611617
1058534212166
10614265112483
10713614112286113
1081537124110102
1096330446958
110593811310916
1115518758126
1125397763745
1134214810797143
114701386968134
115471361132294
1163113762028
117148741344124
1181028113812832
11927111137022
1202893121684
12113643259719
12211170123858
12396103147868
12410104663149
1257759710831
12688614511655
12733741065046
12874282610076
1297618101126133
..................
170144124779282
1713698484380
17251143683474
17314911718141120
1748139146112122
17511510164629
17610714045148
17765436810918
1783110011049123
1792946695790
18014686182246
18171504090140
1824100147116110
1835587937834
18451091248782
185101181395051
18632127136124
187941613813149
1886510112312886
18943941029132
190681359428125
1913060982715
1928916101354
193104139972917
194529419991
195191021354140
19658100708264
19784971297613
19813115744114
199793795128116
\n", + "

100 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 122 10 5 28 57\n", + "101 74 129 16 114 26\n", + "102 97 121 122 29 65\n", + "103 141 73 120 147 1\n", + "104 126 132 86 116 17\n", + "105 85 3 42 121 66\n", + "106 142 65 1 124 83\n", + "107 136 141 122 86 113\n", + "108 15 37 124 110 102\n", + "109 63 30 44 69 58\n", + "110 59 38 113 109 16\n", + "111 5 51 87 58 126\n", + "112 53 97 76 37 45\n", + "113 42 148 107 97 143\n", + "114 70 138 69 68 134\n", + "115 47 136 113 22 94\n", + "116 31 137 6 20 28\n", + "117 148 74 134 4 124\n", + "118 102 81 138 128 32\n", + "119 27 111 13 70 22\n", + "120 28 93 121 68 4\n", + "121 136 43 25 97 19\n", + "122 111 70 12 38 58\n", + "123 96 103 147 86 8\n", + "124 10 10 46 63 149\n", + "125 7 75 97 108 31\n", + "126 88 6 145 116 55\n", + "127 33 74 106 50 46\n", + "128 74 28 26 100 76\n", + "129 76 18 101 126 133\n", + ".. ... ... ... ... ...\n", + "170 144 124 77 92 82\n", + "171 36 98 48 43 80\n", + "172 51 143 68 34 74\n", + "173 149 117 18 141 120\n", + "174 8 139 146 112 122\n", + "175 115 101 64 62 9\n", + "176 10 7 140 45 148\n", + "177 65 43 68 109 18\n", + "178 31 100 110 49 123\n", + "179 29 46 69 57 90\n", + "180 146 86 18 22 46\n", + "181 71 50 40 90 140\n", + "182 4 100 147 116 110\n", + "183 55 87 93 78 34\n", + "184 5 109 124 87 82\n", + "185 10 118 139 50 51\n", + "186 32 12 71 36 124\n", + "187 94 16 138 13 149\n", + "188 65 101 123 128 86\n", + "189 43 94 10 29 132\n", + "190 68 135 94 28 125\n", + "191 30 60 98 27 15\n", + "192 89 16 10 135 4\n", + "193 104 139 97 29 17\n", + "194 5 29 41 99 91\n", + "195 19 102 135 41 40\n", + "196 58 100 70 82 64\n", + "197 84 97 129 76 13\n", + "198 131 15 7 44 114\n", + "199 79 37 95 128 116\n", + "\n", + "[100 rows x 5 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = DataFrame(np.random.randint(0,150,size = (100,5)),index = np.arange(100,200),columns=['Python','En','Math','Physic','Chem'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python False\n", + "En False\n", + "Math False\n", + "Physic False\n", + "Chem False\n", + "dtype: bool" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 判断DataFrame是否存在空数据\n", + "df.isnull().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python True\n", + "En True\n", + "Math True\n", + "Physic True\n", + "Chem True\n", + "dtype: bool" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.notnull().all()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "500" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "100*5" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(50):\n", + " # 行索引\n", + " index = np.random.randint(100,200,size =1)[0]\n", + "\n", + " cols = df.columns\n", + "\n", + " # 列索引\n", + " col = np.random.choice(cols)\n", + "\n", + " df.loc[index,col] = None" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(20):\n", + " # 行索引\n", + " index = np.random.randint(100,200,size =1)[0]\n", + "\n", + " cols = df.columns\n", + "\n", + " # 列索引\n", + " col = np.random.choice(cols)\n", + "\n", + "# not a number 不是一个数\n", + " df.loc[index,col] = np.NAN" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
100122.010.05.028.057.0
101NaN129.016.0114.026.0
10297.0121.0122.029.065.0
103141.073.0120.0147.01.0
104126.0NaN86.0116.017.0
10585.0NaN42.0121.066.0
106142.065.01.0124.083.0
107136.0141.0NaN86.0113.0
10815.037.0124.0110.0102.0
10963.030.0NaN69.058.0
110NaNNaN113.0109.016.0
1115.051.087.058.0126.0
11253.097.076.037.045.0
11342.0148.0NaN97.0NaN
11470.0138.069.068.0134.0
115NaN136.0113.022.094.0
11631.0137.06.020.028.0
117148.074.0134.04.0124.0
118102.081.0138.0128.032.0
11927.0111.013.0NaN22.0
12028.093.0121.0NaN4.0
121136.0NaN25.097.019.0
122111.070.012.038.058.0
123NaN103.0147.086.08.0
12410.010.046.063.0149.0
1257.075.097.0108.031.0
12688.06.0NaNNaN55.0
12733.074.0106.050.046.0
12874.028.026.0100.076.0
12976.018.0101.0NaNNaN
..................
170144.0124.077.092.082.0
17136.098.0NaN43.080.0
17251.0NaN68.034.074.0
173149.0NaN18.0141.0NaN
1748.0139.0146.0112.0NaN
175115.0NaN64.062.09.0
176NaN7.0140.045.0148.0
177NaN43.068.0109.018.0
17831.0100.0NaN49.0123.0
17929.046.069.057.090.0
180146.086.018.022.046.0
18171.050.040.0NaN140.0
1824.0100.0147.0116.0110.0
18355.087.093.0NaN34.0
184NaN109.0124.087.082.0
18510.0118.0139.050.051.0
18632.012.071.036.0NaN
18794.0NaN138.013.0149.0
18865.0101.0123.0128.086.0
18943.094.0NaN29.0132.0
19068.0135.094.028.0125.0
19130.060.098.0NaN15.0
19289.016.010.0135.04.0
193104.0139.097.029.017.0
1945.029.041.099.0NaN
19519.0102.0135.041.040.0
19658.0NaN70.082.064.0
197NaN97.0129.076.013.0
198131.015.0NaN44.0114.0
19979.0NaN95.0128.0NaN
\n", + "

100 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 122.0 10.0 5.0 28.0 57.0\n", + "101 NaN 129.0 16.0 114.0 26.0\n", + "102 97.0 121.0 122.0 29.0 65.0\n", + "103 141.0 73.0 120.0 147.0 1.0\n", + "104 126.0 NaN 86.0 116.0 17.0\n", + "105 85.0 NaN 42.0 121.0 66.0\n", + "106 142.0 65.0 1.0 124.0 83.0\n", + "107 136.0 141.0 NaN 86.0 113.0\n", + "108 15.0 37.0 124.0 110.0 102.0\n", + "109 63.0 30.0 NaN 69.0 58.0\n", + "110 NaN NaN 113.0 109.0 16.0\n", + "111 5.0 51.0 87.0 58.0 126.0\n", + "112 53.0 97.0 76.0 37.0 45.0\n", + "113 42.0 148.0 NaN 97.0 NaN\n", + "114 70.0 138.0 69.0 68.0 134.0\n", + "115 NaN 136.0 113.0 22.0 94.0\n", + "116 31.0 137.0 6.0 20.0 28.0\n", + "117 148.0 74.0 134.0 4.0 124.0\n", + "118 102.0 81.0 138.0 128.0 32.0\n", + "119 27.0 111.0 13.0 NaN 22.0\n", + "120 28.0 93.0 121.0 NaN 4.0\n", + "121 136.0 NaN 25.0 97.0 19.0\n", + "122 111.0 70.0 12.0 38.0 58.0\n", + "123 NaN 103.0 147.0 86.0 8.0\n", + "124 10.0 10.0 46.0 63.0 149.0\n", + "125 7.0 75.0 97.0 108.0 31.0\n", + "126 88.0 6.0 NaN NaN 55.0\n", + "127 33.0 74.0 106.0 50.0 46.0\n", + "128 74.0 28.0 26.0 100.0 76.0\n", + "129 76.0 18.0 101.0 NaN NaN\n", + ".. ... ... ... ... ...\n", + "170 144.0 124.0 77.0 92.0 82.0\n", + "171 36.0 98.0 NaN 43.0 80.0\n", + "172 51.0 NaN 68.0 34.0 74.0\n", + "173 149.0 NaN 18.0 141.0 NaN\n", + "174 8.0 139.0 146.0 112.0 NaN\n", + "175 115.0 NaN 64.0 62.0 9.0\n", + "176 NaN 7.0 140.0 45.0 148.0\n", + "177 NaN 43.0 68.0 109.0 18.0\n", + "178 31.0 100.0 NaN 49.0 123.0\n", + "179 29.0 46.0 69.0 57.0 90.0\n", + "180 146.0 86.0 18.0 22.0 46.0\n", + "181 71.0 50.0 40.0 NaN 140.0\n", + "182 4.0 100.0 147.0 116.0 110.0\n", + "183 55.0 87.0 93.0 NaN 34.0\n", + "184 NaN 109.0 124.0 87.0 82.0\n", + "185 10.0 118.0 139.0 50.0 51.0\n", + "186 32.0 12.0 71.0 36.0 NaN\n", + "187 94.0 NaN 138.0 13.0 149.0\n", + "188 65.0 101.0 123.0 128.0 86.0\n", + "189 43.0 94.0 NaN 29.0 132.0\n", + "190 68.0 135.0 94.0 28.0 125.0\n", + "191 30.0 60.0 98.0 NaN 15.0\n", + "192 89.0 16.0 10.0 135.0 4.0\n", + "193 104.0 139.0 97.0 29.0 17.0\n", + "194 5.0 29.0 41.0 99.0 NaN\n", + "195 19.0 102.0 135.0 41.0 40.0\n", + "196 58.0 NaN 70.0 82.0 64.0\n", + "197 NaN 97.0 129.0 76.0 13.0\n", + "198 131.0 15.0 NaN 44.0 114.0\n", + "199 79.0 NaN 95.0 128.0 NaN\n", + "\n", + "[100 rows x 5 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python True\n", + "En True\n", + "Math True\n", + "Physic True\n", + "Chem True\n", + "dtype: bool" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 14\n", + "En 14\n", + "Math 15\n", + "Physic 11\n", + "Chem 13\n", + "dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = df.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 14\n", + "En 14\n", + "Math 15\n", + "Physic 11\n", + "Chem 13\n", + "dtype: int64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
100122.010.05.028.057.0
101100.0129.016.0114.026.0
10297.0121.0122.029.065.0
103141.073.0120.0147.01.0
104126.0100.086.0116.017.0
10585.0100.042.0121.066.0
106142.065.01.0124.083.0
107136.0141.0100.086.0113.0
10815.037.0124.0110.0102.0
10963.030.0100.069.058.0
110100.0100.0113.0109.016.0
1115.051.087.058.0126.0
11253.097.076.037.045.0
11342.0148.0100.097.0100.0
11470.0138.069.068.0134.0
115100.0136.0113.022.094.0
11631.0137.06.020.028.0
117148.074.0134.04.0124.0
118102.081.0138.0128.032.0
11927.0111.013.0100.022.0
12028.093.0121.0100.04.0
121136.0100.025.097.019.0
122111.070.012.038.058.0
123100.0103.0147.086.08.0
12410.010.046.063.0149.0
1257.075.097.0108.031.0
12688.06.0100.0100.055.0
12733.074.0106.050.046.0
12874.028.026.0100.076.0
12976.018.0101.0100.0100.0
..................
170144.0124.077.092.082.0
17136.098.0100.043.080.0
17251.0100.068.034.074.0
173149.0100.018.0141.0100.0
1748.0139.0146.0112.0100.0
175115.0100.064.062.09.0
176100.07.0140.045.0148.0
177100.043.068.0109.018.0
17831.0100.0100.049.0123.0
17929.046.069.057.090.0
180146.086.018.022.046.0
18171.050.040.0100.0140.0
1824.0100.0147.0116.0110.0
18355.087.093.0100.034.0
184100.0109.0124.087.082.0
18510.0118.0139.050.051.0
18632.012.071.036.0100.0
18794.0100.0138.013.0149.0
18865.0101.0123.0128.086.0
18943.094.0100.029.0132.0
19068.0135.094.028.0125.0
19130.060.098.0100.015.0
19289.016.010.0135.04.0
193104.0139.097.029.017.0
1945.029.041.099.0100.0
19519.0102.0135.041.040.0
19658.0100.070.082.064.0
197100.097.0129.076.013.0
198131.015.0100.044.0114.0
19979.0100.095.0128.0100.0
\n", + "

100 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 122.0 10.0 5.0 28.0 57.0\n", + "101 100.0 129.0 16.0 114.0 26.0\n", + "102 97.0 121.0 122.0 29.0 65.0\n", + "103 141.0 73.0 120.0 147.0 1.0\n", + "104 126.0 100.0 86.0 116.0 17.0\n", + "105 85.0 100.0 42.0 121.0 66.0\n", + "106 142.0 65.0 1.0 124.0 83.0\n", + "107 136.0 141.0 100.0 86.0 113.0\n", + "108 15.0 37.0 124.0 110.0 102.0\n", + "109 63.0 30.0 100.0 69.0 58.0\n", + "110 100.0 100.0 113.0 109.0 16.0\n", + "111 5.0 51.0 87.0 58.0 126.0\n", + "112 53.0 97.0 76.0 37.0 45.0\n", + "113 42.0 148.0 100.0 97.0 100.0\n", + "114 70.0 138.0 69.0 68.0 134.0\n", + "115 100.0 136.0 113.0 22.0 94.0\n", + "116 31.0 137.0 6.0 20.0 28.0\n", + "117 148.0 74.0 134.0 4.0 124.0\n", + "118 102.0 81.0 138.0 128.0 32.0\n", + "119 27.0 111.0 13.0 100.0 22.0\n", + "120 28.0 93.0 121.0 100.0 4.0\n", + "121 136.0 100.0 25.0 97.0 19.0\n", + "122 111.0 70.0 12.0 38.0 58.0\n", + "123 100.0 103.0 147.0 86.0 8.0\n", + "124 10.0 10.0 46.0 63.0 149.0\n", + "125 7.0 75.0 97.0 108.0 31.0\n", + "126 88.0 6.0 100.0 100.0 55.0\n", + "127 33.0 74.0 106.0 50.0 46.0\n", + "128 74.0 28.0 26.0 100.0 76.0\n", + "129 76.0 18.0 101.0 100.0 100.0\n", + ".. ... ... ... ... ...\n", + "170 144.0 124.0 77.0 92.0 82.0\n", + "171 36.0 98.0 100.0 43.0 80.0\n", + "172 51.0 100.0 68.0 34.0 74.0\n", + "173 149.0 100.0 18.0 141.0 100.0\n", + "174 8.0 139.0 146.0 112.0 100.0\n", + "175 115.0 100.0 64.0 62.0 9.0\n", + "176 100.0 7.0 140.0 45.0 148.0\n", + "177 100.0 43.0 68.0 109.0 18.0\n", + "178 31.0 100.0 100.0 49.0 123.0\n", + "179 29.0 46.0 69.0 57.0 90.0\n", + "180 146.0 86.0 18.0 22.0 46.0\n", + "181 71.0 50.0 40.0 100.0 140.0\n", + "182 4.0 100.0 147.0 116.0 110.0\n", + "183 55.0 87.0 93.0 100.0 34.0\n", + "184 100.0 109.0 124.0 87.0 82.0\n", + "185 10.0 118.0 139.0 50.0 51.0\n", + "186 32.0 12.0 71.0 36.0 100.0\n", + "187 94.0 100.0 138.0 13.0 149.0\n", + "188 65.0 101.0 123.0 128.0 86.0\n", + "189 43.0 94.0 100.0 29.0 132.0\n", + "190 68.0 135.0 94.0 28.0 125.0\n", + "191 30.0 60.0 98.0 100.0 15.0\n", + "192 89.0 16.0 10.0 135.0 4.0\n", + "193 104.0 139.0 97.0 29.0 17.0\n", + "194 5.0 29.0 41.0 99.0 100.0\n", + "195 19.0 102.0 135.0 41.0 40.0\n", + "196 58.0 100.0 70.0 82.0 64.0\n", + "197 100.0 97.0 129.0 76.0 13.0\n", + "198 131.0 15.0 100.0 44.0 114.0\n", + "199 79.0 100.0 95.0 128.0 100.0\n", + "\n", + "[100 rows x 5 columns]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 固定值填充\n", + "df2.fillna(value=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 71.662791\n", + "En 75.627907\n", + "Math 77.929412\n", + "Physic 73.471910\n", + "Chem 69.080460\n", + "dtype: float64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
1001221052857
101711291611426
102971211222965
103141731201471
104126758611617
10585754212166
10614265112483
1071361417786113
1081537124110102
1096330776958
110717511310916
1115518758126
1125397763745
11342148779769
114701386968134
115711361132294
1163113762028
117148741344124
1181028113812832
11927111137322
1202893121734
12113675259719
12211170123858
12371103147868
12410104663149
1257759710831
126886777355
12733741065046
12874282610076
12976181017369
..................
170144124779282
1713698774380
1725175683474
173149751814169
174813914611269
1751157564629
17671714045148
17771436810918
178311007749123
1792946695790
18014686182246
18171504073140
1824100147116110
1835587937334
184711091248782
185101181395051
1863212713669
187947513813149
1886510112312886
18943947729132
190681359428125
1913060987315
1928916101354
193104139972917
194529419969
195191021354140
1965875708264
19771971297613
198131157744114
19979759512869
\n", + "

100 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 122 10 5 28 57\n", + "101 71 129 16 114 26\n", + "102 97 121 122 29 65\n", + "103 141 73 120 147 1\n", + "104 126 75 86 116 17\n", + "105 85 75 42 121 66\n", + "106 142 65 1 124 83\n", + "107 136 141 77 86 113\n", + "108 15 37 124 110 102\n", + "109 63 30 77 69 58\n", + "110 71 75 113 109 16\n", + "111 5 51 87 58 126\n", + "112 53 97 76 37 45\n", + "113 42 148 77 97 69\n", + "114 70 138 69 68 134\n", + "115 71 136 113 22 94\n", + "116 31 137 6 20 28\n", + "117 148 74 134 4 124\n", + "118 102 81 138 128 32\n", + "119 27 111 13 73 22\n", + "120 28 93 121 73 4\n", + "121 136 75 25 97 19\n", + "122 111 70 12 38 58\n", + "123 71 103 147 86 8\n", + "124 10 10 46 63 149\n", + "125 7 75 97 108 31\n", + "126 88 6 77 73 55\n", + "127 33 74 106 50 46\n", + "128 74 28 26 100 76\n", + "129 76 18 101 73 69\n", + ".. ... ... ... ... ...\n", + "170 144 124 77 92 82\n", + "171 36 98 77 43 80\n", + "172 51 75 68 34 74\n", + "173 149 75 18 141 69\n", + "174 8 139 146 112 69\n", + "175 115 75 64 62 9\n", + "176 71 7 140 45 148\n", + "177 71 43 68 109 18\n", + "178 31 100 77 49 123\n", + "179 29 46 69 57 90\n", + "180 146 86 18 22 46\n", + "181 71 50 40 73 140\n", + "182 4 100 147 116 110\n", + "183 55 87 93 73 34\n", + "184 71 109 124 87 82\n", + "185 10 118 139 50 51\n", + "186 32 12 71 36 69\n", + "187 94 75 138 13 149\n", + "188 65 101 123 128 86\n", + "189 43 94 77 29 132\n", + "190 68 135 94 28 125\n", + "191 30 60 98 73 15\n", + "192 89 16 10 135 4\n", + "193 104 139 97 29 17\n", + "194 5 29 41 99 69\n", + "195 19 102 135 41 40\n", + "196 58 75 70 82 64\n", + "197 71 97 129 76 13\n", + "198 131 15 77 44 114\n", + "199 79 75 95 128 69\n", + "\n", + "[100 rows x 5 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 均值\n", + "df3 = df2.fillna(value=df2.mean())\n", + "df3.astype(np.int16)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 6, 18, 1, 17, 19, 5, 17, 16, 13, 3])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nd = np.random.randint(0,20,size = 10)\n", + "nd" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 3, 5, 6, 13, 16, 17, 17, 18, 19])" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nd.sort()\n", + "nd" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "14.5" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(13 + 16)/2" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "14.5" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.median(nd)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
100122.010.05.028.057.0
10168.0129.016.0114.026.0
10297.0121.0122.029.065.0
103141.073.0120.0147.01.0
104126.082.586.0116.017.0
10585.082.542.0121.066.0
106142.065.01.0124.083.0
107136.0141.086.086.0113.0
10815.037.0124.0110.0102.0
10963.030.086.069.058.0
11068.082.5113.0109.016.0
1115.051.087.058.0126.0
11253.097.076.037.045.0
11342.0148.086.097.065.0
11470.0138.069.068.0134.0
11568.0136.0113.022.094.0
11631.0137.06.020.028.0
117148.074.0134.04.0124.0
118102.081.0138.0128.032.0
11927.0111.013.069.022.0
12028.093.0121.069.04.0
121136.082.525.097.019.0
122111.070.012.038.058.0
12368.0103.0147.086.08.0
12410.010.046.063.0149.0
1257.075.097.0108.031.0
12688.06.086.069.055.0
12733.074.0106.050.046.0
12874.028.026.0100.076.0
12976.018.0101.069.065.0
..................
170144.0124.077.092.082.0
17136.098.086.043.080.0
17251.082.568.034.074.0
173149.082.518.0141.065.0
1748.0139.0146.0112.065.0
175115.082.564.062.09.0
17668.07.0140.045.0148.0
17768.043.068.0109.018.0
17831.0100.086.049.0123.0
17929.046.069.057.090.0
180146.086.018.022.046.0
18171.050.040.069.0140.0
1824.0100.0147.0116.0110.0
18355.087.093.069.034.0
18468.0109.0124.087.082.0
18510.0118.0139.050.051.0
18632.012.071.036.065.0
18794.082.5138.013.0149.0
18865.0101.0123.0128.086.0
18943.094.086.029.0132.0
19068.0135.094.028.0125.0
19130.060.098.069.015.0
19289.016.010.0135.04.0
193104.0139.097.029.017.0
1945.029.041.099.065.0
19519.0102.0135.041.040.0
19658.082.570.082.064.0
19768.097.0129.076.013.0
198131.015.086.044.0114.0
19979.082.595.0128.065.0
\n", + "

100 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 122.0 10.0 5.0 28.0 57.0\n", + "101 68.0 129.0 16.0 114.0 26.0\n", + "102 97.0 121.0 122.0 29.0 65.0\n", + "103 141.0 73.0 120.0 147.0 1.0\n", + "104 126.0 82.5 86.0 116.0 17.0\n", + "105 85.0 82.5 42.0 121.0 66.0\n", + "106 142.0 65.0 1.0 124.0 83.0\n", + "107 136.0 141.0 86.0 86.0 113.0\n", + "108 15.0 37.0 124.0 110.0 102.0\n", + "109 63.0 30.0 86.0 69.0 58.0\n", + "110 68.0 82.5 113.0 109.0 16.0\n", + "111 5.0 51.0 87.0 58.0 126.0\n", + "112 53.0 97.0 76.0 37.0 45.0\n", + "113 42.0 148.0 86.0 97.0 65.0\n", + "114 70.0 138.0 69.0 68.0 134.0\n", + "115 68.0 136.0 113.0 22.0 94.0\n", + "116 31.0 137.0 6.0 20.0 28.0\n", + "117 148.0 74.0 134.0 4.0 124.0\n", + "118 102.0 81.0 138.0 128.0 32.0\n", + "119 27.0 111.0 13.0 69.0 22.0\n", + "120 28.0 93.0 121.0 69.0 4.0\n", + "121 136.0 82.5 25.0 97.0 19.0\n", + "122 111.0 70.0 12.0 38.0 58.0\n", + "123 68.0 103.0 147.0 86.0 8.0\n", + "124 10.0 10.0 46.0 63.0 149.0\n", + "125 7.0 75.0 97.0 108.0 31.0\n", + "126 88.0 6.0 86.0 69.0 55.0\n", + "127 33.0 74.0 106.0 50.0 46.0\n", + "128 74.0 28.0 26.0 100.0 76.0\n", + "129 76.0 18.0 101.0 69.0 65.0\n", + ".. ... ... ... ... ...\n", + "170 144.0 124.0 77.0 92.0 82.0\n", + "171 36.0 98.0 86.0 43.0 80.0\n", + "172 51.0 82.5 68.0 34.0 74.0\n", + "173 149.0 82.5 18.0 141.0 65.0\n", + "174 8.0 139.0 146.0 112.0 65.0\n", + "175 115.0 82.5 64.0 62.0 9.0\n", + "176 68.0 7.0 140.0 45.0 148.0\n", + "177 68.0 43.0 68.0 109.0 18.0\n", + "178 31.0 100.0 86.0 49.0 123.0\n", + "179 29.0 46.0 69.0 57.0 90.0\n", + "180 146.0 86.0 18.0 22.0 46.0\n", + "181 71.0 50.0 40.0 69.0 140.0\n", + "182 4.0 100.0 147.0 116.0 110.0\n", + "183 55.0 87.0 93.0 69.0 34.0\n", + "184 68.0 109.0 124.0 87.0 82.0\n", + "185 10.0 118.0 139.0 50.0 51.0\n", + "186 32.0 12.0 71.0 36.0 65.0\n", + "187 94.0 82.5 138.0 13.0 149.0\n", + "188 65.0 101.0 123.0 128.0 86.0\n", + "189 43.0 94.0 86.0 29.0 132.0\n", + "190 68.0 135.0 94.0 28.0 125.0\n", + "191 30.0 60.0 98.0 69.0 15.0\n", + "192 89.0 16.0 10.0 135.0 4.0\n", + "193 104.0 139.0 97.0 29.0 17.0\n", + "194 5.0 29.0 41.0 99.0 65.0\n", + "195 19.0 102.0 135.0 41.0 40.0\n", + "196 58.0 82.5 70.0 82.0 64.0\n", + "197 68.0 97.0 129.0 76.0 13.0\n", + "198 131.0 15.0 86.0 44.0 114.0\n", + "199 79.0 82.5 95.0 128.0 65.0\n", + "\n", + "[100 rows x 5 columns]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 中位数填充\n", + "df2.median()\n", + "df4 = df2.fillna(df2.median())\n", + "df4" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
100122.010.05.028.057.0
101NaN129.016.0114.026.0
10297.0121.0122.029.065.0
103141.073.0120.0147.01.0
104126.0NaN86.0116.017.0
10585.0NaN42.0121.066.0
106142.065.01.0124.083.0
107136.0141.0NaN86.0113.0
10815.037.0124.0110.0102.0
10963.030.0NaN69.058.0
110NaNNaN113.0109.016.0
1115.051.087.058.0126.0
11253.097.076.037.045.0
11342.0148.0NaN97.0NaN
11470.0138.069.068.0134.0
115NaN136.0113.022.094.0
11631.0137.06.020.028.0
117148.074.0134.04.0124.0
118102.081.0138.0128.032.0
11927.0111.013.0NaN22.0
12028.093.0121.0NaN4.0
121136.0NaN25.097.019.0
122111.070.012.038.058.0
123NaN103.0147.086.08.0
12410.010.046.063.0149.0
1257.075.097.0108.031.0
12688.06.0NaNNaN55.0
12733.074.0106.050.046.0
12874.028.026.0100.076.0
12976.018.0101.0NaNNaN
..................
170144.0124.077.092.082.0
17136.098.0NaN43.080.0
17251.0NaN68.034.074.0
173149.0NaN18.0141.0NaN
1748.0139.0146.0112.0NaN
175115.0NaN64.062.09.0
176NaN7.0140.045.0148.0
177NaN43.068.0109.018.0
17831.0100.0NaN49.0123.0
17929.046.069.057.090.0
180146.086.018.022.046.0
18171.050.040.0NaN140.0
1824.0100.0147.0116.0110.0
18355.087.093.0NaN34.0
184NaN109.0124.087.082.0
18510.0118.0139.050.051.0
18632.012.071.036.0NaN
18794.0NaN138.013.0149.0
18865.0101.0123.0128.086.0
18943.094.0NaN29.0132.0
19068.0135.094.028.0125.0
19130.060.098.0NaN15.0
19289.016.010.0135.04.0
193104.0139.097.029.017.0
1945.029.041.099.0NaN
19519.0102.0135.041.040.0
19658.0NaN70.082.064.0
197NaN97.0129.076.013.0
198131.015.0NaN44.0114.0
19979.0NaN95.0128.0NaN
\n", + "

100 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 122.0 10.0 5.0 28.0 57.0\n", + "101 NaN 129.0 16.0 114.0 26.0\n", + "102 97.0 121.0 122.0 29.0 65.0\n", + "103 141.0 73.0 120.0 147.0 1.0\n", + "104 126.0 NaN 86.0 116.0 17.0\n", + "105 85.0 NaN 42.0 121.0 66.0\n", + "106 142.0 65.0 1.0 124.0 83.0\n", + "107 136.0 141.0 NaN 86.0 113.0\n", + "108 15.0 37.0 124.0 110.0 102.0\n", + "109 63.0 30.0 NaN 69.0 58.0\n", + "110 NaN NaN 113.0 109.0 16.0\n", + "111 5.0 51.0 87.0 58.0 126.0\n", + "112 53.0 97.0 76.0 37.0 45.0\n", + "113 42.0 148.0 NaN 97.0 NaN\n", + "114 70.0 138.0 69.0 68.0 134.0\n", + "115 NaN 136.0 113.0 22.0 94.0\n", + "116 31.0 137.0 6.0 20.0 28.0\n", + "117 148.0 74.0 134.0 4.0 124.0\n", + "118 102.0 81.0 138.0 128.0 32.0\n", + "119 27.0 111.0 13.0 NaN 22.0\n", + "120 28.0 93.0 121.0 NaN 4.0\n", + "121 136.0 NaN 25.0 97.0 19.0\n", + "122 111.0 70.0 12.0 38.0 58.0\n", + "123 NaN 103.0 147.0 86.0 8.0\n", + "124 10.0 10.0 46.0 63.0 149.0\n", + "125 7.0 75.0 97.0 108.0 31.0\n", + "126 88.0 6.0 NaN NaN 55.0\n", + "127 33.0 74.0 106.0 50.0 46.0\n", + "128 74.0 28.0 26.0 100.0 76.0\n", + "129 76.0 18.0 101.0 NaN NaN\n", + ".. ... ... ... ... ...\n", + "170 144.0 124.0 77.0 92.0 82.0\n", + "171 36.0 98.0 NaN 43.0 80.0\n", + "172 51.0 NaN 68.0 34.0 74.0\n", + "173 149.0 NaN 18.0 141.0 NaN\n", + "174 8.0 139.0 146.0 112.0 NaN\n", + "175 115.0 NaN 64.0 62.0 9.0\n", + "176 NaN 7.0 140.0 45.0 148.0\n", + "177 NaN 43.0 68.0 109.0 18.0\n", + "178 31.0 100.0 NaN 49.0 123.0\n", + "179 29.0 46.0 69.0 57.0 90.0\n", + "180 146.0 86.0 18.0 22.0 46.0\n", + "181 71.0 50.0 40.0 NaN 140.0\n", + "182 4.0 100.0 147.0 116.0 110.0\n", + "183 55.0 87.0 93.0 NaN 34.0\n", + "184 NaN 109.0 124.0 87.0 82.0\n", + "185 10.0 118.0 139.0 50.0 51.0\n", + "186 32.0 12.0 71.0 36.0 NaN\n", + "187 94.0 NaN 138.0 13.0 149.0\n", + "188 65.0 101.0 123.0 128.0 86.0\n", + "189 43.0 94.0 NaN 29.0 132.0\n", + "190 68.0 135.0 94.0 28.0 125.0\n", + "191 30.0 60.0 98.0 NaN 15.0\n", + "192 89.0 16.0 10.0 135.0 4.0\n", + "193 104.0 139.0 97.0 29.0 17.0\n", + "194 5.0 29.0 41.0 99.0 NaN\n", + "195 19.0 102.0 135.0 41.0 40.0\n", + "196 58.0 NaN 70.0 82.0 64.0\n", + "197 NaN 97.0 129.0 76.0 13.0\n", + "198 131.0 15.0 NaN 44.0 114.0\n", + "199 79.0 NaN 95.0 128.0 NaN\n", + "\n", + "[100 rows x 5 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 众数填充,数量最多的那个数\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
100828999101125
101431109325
10256103566190
1034710014713899
1043846827544
10518111223126
106562610614139
10731377567144
10835471026063
109861265788149
11019140303533
11176151133
11231549111969
1136437502321
11472571381521
115551201043225
11696248922146
11763086489
11828461258274
119853970132111
12010990447439
121214810311465
12211029998057
123109888113571
12470103134121121
12551921172743
1266929759105
12765905214822
12841291711913
1292410010728139
..................
207012777241631
2071936192822
20721166154861
207347214011234
2074261081233233
207546130135124113
207633181363820
20771071112954119
207884551293787
20799550451984
2080124746514053
20812635149145127
20821921101389
20838410131714
208428741056889
20852393849788
2086861332612513
208721124401155
20882015353137
208996123123564
20902243927860
20911631176058
20926518131334
209369491094058
2094128461082111
20952659854149
209611147909266
209759773140104
2098102675119
209997197714348
\n", + "

2000 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 82 89 99 101 125\n", + "101 4 31 109 32 5\n", + "102 56 103 56 61 90\n", + "103 47 100 147 138 99\n", + "104 38 46 82 75 44\n", + "105 18 11 122 3 126\n", + "106 56 26 106 14 139\n", + "107 3 137 75 67 144\n", + "108 35 47 102 60 63\n", + "109 86 126 57 88 149\n", + "110 19 140 30 35 33\n", + "111 76 1 5 11 33\n", + "112 31 54 91 119 69\n", + "113 64 37 50 23 21\n", + "114 72 57 138 15 21\n", + "115 55 120 104 32 25\n", + "116 96 24 89 22 146\n", + "117 63 0 8 64 89\n", + "118 28 46 125 82 74\n", + "119 85 39 70 132 111\n", + "120 109 90 44 74 39\n", + "121 2 148 103 114 65\n", + "122 110 29 99 80 57\n", + "123 109 88 81 135 71\n", + "124 70 103 134 121 121\n", + "125 51 92 117 27 43\n", + "126 6 92 97 59 105\n", + "127 65 90 52 148 22\n", + "128 4 129 17 119 13\n", + "129 24 100 107 28 139\n", + "... ... ... ... ... ...\n", + "2070 127 77 24 16 31\n", + "2071 93 61 9 28 22\n", + "2072 116 61 54 8 61\n", + "2073 4 72 140 112 34\n", + "2074 26 108 123 32 33\n", + "2075 46 130 135 124 113\n", + "2076 33 18 136 38 20\n", + "2077 107 11 129 54 119\n", + "2078 84 55 129 37 87\n", + "2079 95 50 45 19 84\n", + "2080 124 74 65 140 53\n", + "2081 26 35 149 145 127\n", + "2082 19 21 101 3 89\n", + "2083 84 10 131 71 4\n", + "2084 28 74 105 68 89\n", + "2085 23 93 84 97 88\n", + "2086 86 133 26 125 13\n", + "2087 21 124 40 115 5\n", + "2088 20 15 35 31 37\n", + "2089 96 123 123 5 64\n", + "2090 22 43 92 78 60\n", + "2091 16 31 17 60 58\n", + "2092 65 18 13 13 34\n", + "2093 69 49 109 40 58\n", + "2094 128 46 10 82 111\n", + "2095 26 59 8 54 149\n", + "2096 111 47 90 92 66\n", + "2097 5 97 73 140 104\n", + "2098 102 6 7 5 119\n", + "2099 97 19 77 143 48\n", + "\n", + "[2000 rows x 5 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = DataFrame(np.random.randint(0,150,size = (2000,5)),index = np.arange(100,2100),columns=['Python','En','Math','Physic','Chem'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(1000):\n", + " # 行索引\n", + " index = np.random.randint(100,2100,size =1)[0]\n", + "\n", + " cols = df.columns\n", + "\n", + " # 列索引\n", + " col = np.random.choice(cols)\n", + "\n", + " df.loc[index,col] = None" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 190\n", + "En 200\n", + "Math 194\n", + "Physic 189\n", + "Chem 181\n", + "dtype: int64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
10082.089.099.0101.0125.0
1014.031.0109.032.05.0
10256.0103.056.0NaN90.0
10347.0100.0147.0138.099.0
10438.046.0NaN75.044.0
\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 82.0 89.0 99.0 101.0 125.0\n", + "101 4.0 31.0 109.0 32.0 5.0\n", + "102 56.0 103.0 56.0 NaN 90.0\n", + "103 47.0 100.0 147.0 138.0 99.0\n", + "104 38.0 46.0 NaN 75.0 44.0" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
209526.059.08.054.0149.0
2096NaN47.090.092.066.0
20975.097.073.0140.0104.0
2098102.06.07.05.0119.0
209997.019.077.0NaN48.0
\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "2095 26.0 59.0 8.0 54.0 149.0\n", + "2096 NaN 47.0 90.0 92.0 66.0\n", + "2097 5.0 97.0 73.0 140.0 104.0\n", + "2098 102.0 6.0 7.0 5.0 119.0\n", + "2099 97.0 19.0 77.0 NaN 48.0" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 82., 4., 56., 47., 38., 18., 3., 35., 86., 19., 76.,\n", + " 31., 64., 72., 55., 96., 63., 28., 85., 109., 2., 110.,\n", + " 70., 51., 6., 65., 24., 48., 44., 11., 114., 129., 87.,\n", + " 108., 125., nan, 140., 132., 91., 34., 54., 30., 12., 98.,\n", + " 142., 79., 13., 77., 40., 139., 39., 81., 112., 36., 22.,\n", + " 5., 120., 17., 127., 119., 59., 146., 89., 103., 8., 97.,\n", + " 130., 73., 83., 122., 95., 100., 41., 21., 136., 80., 101.,\n", + " 50., 27., 71., 16., 141., 126., 102., 145., 15., 52., 94.,\n", + " 10., 33., 137., 9., 128., 88., 26., 84., 93., 1., 7.,\n", + " 131., 107., 148., 0., 105., 66., 32., 115., 118., 58., 53.,\n", + " 29., 42., 57., 62., 25., 60., 69., 133., 68., 20., 106.,\n", + " 147., 78., 90., 124., 149., 92., 75., 117., 143., 99., 37.,\n", + " 123., 45., 61., 121., 135., 138., 116., 14., 104., 74., 46.,\n", + " 111., 23., 43., 49., 144., 113., 67., 134.])" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 去重之后的数据\n", + "df['Python'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "143.0 20\n", + "136.0 20\n", + "102.0 19\n", + "105.0 19\n", + "26.0 19\n", + "69.0 19\n", + "31.0 18\n", + "148.0 18\n", + "75.0 18\n", + "139.0 18\n", + "1.0 18\n", + "35.0 17\n", + "140.0 17\n", + "110.0 17\n", + "125.0 17\n", + "146.0 17\n", + "141.0 17\n", + "64.0 16\n", + "30.0 16\n", + "79.0 16\n", + "73.0 16\n", + "40.0 16\n", + "10.0 15\n", + "6.0 15\n", + "65.0 15\n", + "81.0 15\n", + "28.0 15\n", + "48.0 15\n", + "92.0 15\n", + "103.0 15\n", + " ..\n", + "104.0 9\n", + "12.0 9\n", + "116.0 9\n", + "13.0 9\n", + "59.0 9\n", + "93.0 9\n", + "124.0 9\n", + "85.0 8\n", + "135.0 8\n", + "131.0 8\n", + "68.0 8\n", + "66.0 8\n", + "62.0 8\n", + "120.0 8\n", + "17.0 8\n", + "25.0 8\n", + "145.0 7\n", + "58.0 7\n", + "134.0 7\n", + "113.0 7\n", + "123.0 7\n", + "39.0 7\n", + "34.0 7\n", + "43.0 7\n", + "74.0 6\n", + "144.0 6\n", + "132.0 6\n", + "142.0 5\n", + "67.0 5\n", + "49.0 5\n", + "Name: Python, Length: 150, dtype: int64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Python'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "8.0 21\n", + "96.0 19\n", + "118.0 19\n", + "24.0 19\n", + "43.0 19\n", + "27.0 19\n", + "19.0 19\n", + "41.0 18\n", + "0.0 18\n", + "3.0 18\n", + "52.0 18\n", + "4.0 17\n", + "137.0 17\n", + "1.0 17\n", + "101.0 17\n", + "51.0 17\n", + "39.0 17\n", + "100.0 17\n", + "127.0 17\n", + "115.0 16\n", + "33.0 16\n", + "112.0 16\n", + "92.0 16\n", + "126.0 16\n", + "133.0 15\n", + "32.0 15\n", + "89.0 15\n", + "95.0 15\n", + "36.0 15\n", + "93.0 15\n", + " ..\n", + "12.0 9\n", + "28.0 9\n", + "106.0 9\n", + "45.0 9\n", + "80.0 9\n", + "84.0 9\n", + "58.0 9\n", + "79.0 9\n", + "71.0 9\n", + "83.0 9\n", + "142.0 9\n", + "7.0 9\n", + "6.0 8\n", + "61.0 8\n", + "149.0 8\n", + "34.0 8\n", + "20.0 8\n", + "38.0 8\n", + "130.0 8\n", + "104.0 7\n", + "120.0 7\n", + "56.0 7\n", + "146.0 7\n", + "98.0 7\n", + "134.0 6\n", + "123.0 6\n", + "35.0 6\n", + "87.0 5\n", + "42.0 5\n", + "119.0 4\n", + "Name: En, Length: 150, dtype: int64" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "en = df['En'].value_counts()\n", + "en" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.0" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "en.index[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python 75.0\n", + "En 74.0\n", + "Math 77.5\n", + "Physic 73.0\n", + "Chem 72.0\n", + "dtype: float64 \n" + ] + } + ], + "source": [ + "s = df.median()\n", + "print(s,type(s))" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "zhongshu = []\n", + "for col in df.columns:\n", + " zhongshu.append(df[col].value_counts().index[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 143.0\n", + "En 8.0\n", + "Math 80.0\n", + "Physic 31.0\n", + "Chem 125.0\n", + "dtype: float64" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = Series(zhongshu,index = df.columns)\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
10082.089.099.0101.0125.0
1014.031.0109.032.05.0
10256.0103.056.031.090.0
10347.0100.0147.0138.099.0
10438.046.080.075.044.0
10518.011.0122.03.0126.0
10656.026.0106.014.0139.0
1073.0137.075.067.0144.0
10835.047.0102.060.063.0
10986.0126.080.088.0149.0
11019.0140.080.035.033.0
11176.08.05.011.033.0
11231.054.091.0119.069.0
11364.037.050.023.021.0
11472.057.0138.015.021.0
11555.0120.0104.032.025.0
11696.024.089.031.0146.0
11763.08.08.064.089.0
11828.08.0125.082.074.0
11985.039.070.0132.0111.0
120109.090.080.074.039.0
1212.08.0103.0114.065.0
122110.029.099.080.057.0
123109.088.081.0135.071.0
12470.0103.0134.0121.0121.0
12551.092.0117.031.043.0
1266.092.097.059.0105.0
12765.090.052.0148.022.0
1284.0129.017.0119.013.0
12924.0100.0107.028.0139.0
..................
2070127.077.024.016.0125.0
207193.061.09.028.022.0
2072116.061.054.08.061.0
20734.072.0140.031.034.0
2074143.0108.0123.032.033.0
207546.08.0135.0124.0113.0
2076143.018.0136.038.0125.0
2077143.011.0129.054.0119.0
207884.055.0129.037.087.0
207995.050.045.019.084.0
2080124.074.065.031.053.0
208126.035.0149.0145.0127.0
208219.021.0101.03.089.0
208384.08.0131.071.04.0
208428.074.0105.068.089.0
208523.093.084.097.088.0
208686.0133.026.0125.013.0
208721.0124.040.031.05.0
208820.015.035.031.037.0
208996.0123.0123.05.064.0
209022.043.092.078.060.0
209116.031.017.060.058.0
209265.018.013.013.034.0
209369.049.0109.040.058.0
2094128.046.010.082.0111.0
209526.059.08.054.0149.0
2096143.047.090.092.066.0
20975.097.073.0140.0104.0
2098102.06.07.05.0119.0
209997.019.077.031.048.0
\n", + "

2000 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 82.0 89.0 99.0 101.0 125.0\n", + "101 4.0 31.0 109.0 32.0 5.0\n", + "102 56.0 103.0 56.0 31.0 90.0\n", + "103 47.0 100.0 147.0 138.0 99.0\n", + "104 38.0 46.0 80.0 75.0 44.0\n", + "105 18.0 11.0 122.0 3.0 126.0\n", + "106 56.0 26.0 106.0 14.0 139.0\n", + "107 3.0 137.0 75.0 67.0 144.0\n", + "108 35.0 47.0 102.0 60.0 63.0\n", + "109 86.0 126.0 80.0 88.0 149.0\n", + "110 19.0 140.0 80.0 35.0 33.0\n", + "111 76.0 8.0 5.0 11.0 33.0\n", + "112 31.0 54.0 91.0 119.0 69.0\n", + "113 64.0 37.0 50.0 23.0 21.0\n", + "114 72.0 57.0 138.0 15.0 21.0\n", + "115 55.0 120.0 104.0 32.0 25.0\n", + "116 96.0 24.0 89.0 31.0 146.0\n", + "117 63.0 8.0 8.0 64.0 89.0\n", + "118 28.0 8.0 125.0 82.0 74.0\n", + "119 85.0 39.0 70.0 132.0 111.0\n", + "120 109.0 90.0 80.0 74.0 39.0\n", + "121 2.0 8.0 103.0 114.0 65.0\n", + "122 110.0 29.0 99.0 80.0 57.0\n", + "123 109.0 88.0 81.0 135.0 71.0\n", + "124 70.0 103.0 134.0 121.0 121.0\n", + "125 51.0 92.0 117.0 31.0 43.0\n", + "126 6.0 92.0 97.0 59.0 105.0\n", + "127 65.0 90.0 52.0 148.0 22.0\n", + "128 4.0 129.0 17.0 119.0 13.0\n", + "129 24.0 100.0 107.0 28.0 139.0\n", + "... ... ... ... ... ...\n", + "2070 127.0 77.0 24.0 16.0 125.0\n", + "2071 93.0 61.0 9.0 28.0 22.0\n", + "2072 116.0 61.0 54.0 8.0 61.0\n", + "2073 4.0 72.0 140.0 31.0 34.0\n", + "2074 143.0 108.0 123.0 32.0 33.0\n", + "2075 46.0 8.0 135.0 124.0 113.0\n", + "2076 143.0 18.0 136.0 38.0 125.0\n", + "2077 143.0 11.0 129.0 54.0 119.0\n", + "2078 84.0 55.0 129.0 37.0 87.0\n", + "2079 95.0 50.0 45.0 19.0 84.0\n", + "2080 124.0 74.0 65.0 31.0 53.0\n", + "2081 26.0 35.0 149.0 145.0 127.0\n", + "2082 19.0 21.0 101.0 3.0 89.0\n", + "2083 84.0 8.0 131.0 71.0 4.0\n", + "2084 28.0 74.0 105.0 68.0 89.0\n", + "2085 23.0 93.0 84.0 97.0 88.0\n", + "2086 86.0 133.0 26.0 125.0 13.0\n", + "2087 21.0 124.0 40.0 31.0 5.0\n", + "2088 20.0 15.0 35.0 31.0 37.0\n", + "2089 96.0 123.0 123.0 5.0 64.0\n", + "2090 22.0 43.0 92.0 78.0 60.0\n", + "2091 16.0 31.0 17.0 60.0 58.0\n", + "2092 65.0 18.0 13.0 13.0 34.0\n", + "2093 69.0 49.0 109.0 40.0 58.0\n", + "2094 128.0 46.0 10.0 82.0 111.0\n", + "2095 26.0 59.0 8.0 54.0 149.0\n", + "2096 143.0 47.0 90.0 92.0 66.0\n", + "2097 5.0 97.0 73.0 140.0 104.0\n", + "2098 102.0 6.0 7.0 5.0 119.0\n", + "2099 97.0 19.0 77.0 31.0 48.0\n", + "\n", + "[2000 rows x 5 columns]" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = df.fillna(s)\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 0\n", + "En 0\n", + "Math 0\n", + "Physic 0\n", + "Chem 0\n", + "dtype: int64" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 190\n", + "En 200\n", + "Math 194\n", + "Physic 189\n", + "Chem 181\n", + "dtype: int64" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
10082.089.099.0101.0125.0
1014.031.0109.032.05.0
10256.0103.056.0NaN90.0
10347.0100.0147.0138.099.0
10438.046.0NaN75.044.0
10518.011.0122.03.0126.0
10656.026.0106.014.0139.0
1073.0137.075.067.0144.0
10835.047.0102.060.063.0
10986.0126.0NaN88.0149.0
11019.0140.0NaN35.033.0
11176.0NaN5.011.033.0
11231.054.091.0119.069.0
11364.037.050.023.021.0
11472.057.0138.015.021.0
11555.0120.0104.032.025.0
11696.024.089.0NaN146.0
11763.0NaN8.064.089.0
11828.0NaN125.082.074.0
11985.039.070.0132.0111.0
\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 82.0 89.0 99.0 101.0 125.0\n", + "101 4.0 31.0 109.0 32.0 5.0\n", + "102 56.0 103.0 56.0 NaN 90.0\n", + "103 47.0 100.0 147.0 138.0 99.0\n", + "104 38.0 46.0 NaN 75.0 44.0\n", + "105 18.0 11.0 122.0 3.0 126.0\n", + "106 56.0 26.0 106.0 14.0 139.0\n", + "107 3.0 137.0 75.0 67.0 144.0\n", + "108 35.0 47.0 102.0 60.0 63.0\n", + "109 86.0 126.0 NaN 88.0 149.0\n", + "110 19.0 140.0 NaN 35.0 33.0\n", + "111 76.0 NaN 5.0 11.0 33.0\n", + "112 31.0 54.0 91.0 119.0 69.0\n", + "113 64.0 37.0 50.0 23.0 21.0\n", + "114 72.0 57.0 138.0 15.0 21.0\n", + "115 55.0 120.0 104.0 32.0 25.0\n", + "116 96.0 24.0 89.0 NaN 146.0\n", + "117 63.0 NaN 8.0 64.0 89.0\n", + "118 28.0 NaN 125.0 82.0 74.0\n", + "119 85.0 39.0 70.0 132.0 111.0" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3 = df.iloc[:20]\n", + "df3" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMathPhysicChem
10082.089.099.0101.0125.0
1014.031.0109.032.05.0
10256.0103.056.090.090.0
10347.0100.0147.0138.099.0
10438.046.075.075.044.0
10518.011.0122.03.0126.0
10656.026.0106.014.0139.0
1073.0137.075.067.0144.0
10835.047.0102.060.063.0
10986.0126.088.088.0149.0
11019.0140.035.035.033.0
11176.05.05.011.033.0
11231.054.091.0119.069.0
11364.037.050.023.021.0
11472.057.0138.015.021.0
11555.0120.0104.032.025.0
11696.024.089.0146.0146.0
11763.08.08.064.089.0
11828.0125.0125.082.074.0
11985.039.070.0132.0111.0
\n", + "
" + ], + "text/plain": [ + " Python En Math Physic Chem\n", + "100 82.0 89.0 99.0 101.0 125.0\n", + "101 4.0 31.0 109.0 32.0 5.0\n", + "102 56.0 103.0 56.0 90.0 90.0\n", + "103 47.0 100.0 147.0 138.0 99.0\n", + "104 38.0 46.0 75.0 75.0 44.0\n", + "105 18.0 11.0 122.0 3.0 126.0\n", + "106 56.0 26.0 106.0 14.0 139.0\n", + "107 3.0 137.0 75.0 67.0 144.0\n", + "108 35.0 47.0 102.0 60.0 63.0\n", + "109 86.0 126.0 88.0 88.0 149.0\n", + "110 19.0 140.0 35.0 35.0 33.0\n", + "111 76.0 5.0 5.0 11.0 33.0\n", + "112 31.0 54.0 91.0 119.0 69.0\n", + "113 64.0 37.0 50.0 23.0 21.0\n", + "114 72.0 57.0 138.0 15.0 21.0\n", + "115 55.0 120.0 104.0 32.0 25.0\n", + "116 96.0 24.0 89.0 146.0 146.0\n", + "117 63.0 8.0 8.0 64.0 89.0\n", + "118 28.0 125.0 125.0 82.0 74.0\n", + "119 85.0 39.0 70.0 132.0 111.0" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'''method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None\n", + " Method to use for filling holes in reindexed Series\n", + " pad / ffill: propagate last valid observation forward to next valid\n", + " backfill / bfill: use NEXT valid observation to fill gap'''\n", + "df3.fillna(method='bfill',axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2000, 5)" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#数据量足够大,空数据比较少,直接删除\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.dro" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git "a/Day76-90/code/4-pandas\345\244\232\345\261\202\347\264\242\345\274\225.ipynb" "b/Day76-90/code/4-pandas\345\244\232\345\261\202\347\264\242\345\274\225.ipynb" new file mode 100644 index 0000000..01f9dd6 --- /dev/null +++ "b/Day76-90/code/4-pandas\345\244\232\345\261\202\347\264\242\345\274\225.ipynb" @@ -0,0 +1,568 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "# 数据分析BI-------->人工智能AI\n", + "# 数据分析和数据挖掘一个意思,\n", + "# 工具和软件:Excel 免费版\n", + "# SPSS(一人一年10000)、SAS(一人一年5000)、Matlab 收费\n", + "# R、Python(全方位语言,流行) 免费\n", + "# Python + numpy + scipy + pandas + matplotlib + seaborn + pyEcharts + sklearn + kereas(Tensorflow)+…… \n", + "# 代码,自动化(数据输入----输出结果)\n", + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "a 63\n", + "b 107\n", + "c 16\n", + "d 35\n", + "e 140\n", + "f 83\n", + "dtype: int32" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 多层索引,行列\n", + "# 单层索引\n", + "s = Series(np.random.randint(0,150,size = 6),index=list('abcdef'))\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "张三 期中 114\n", + " 期末 131\n", + "李四 期中 3\n", + " 期末 63\n", + "王五 期中 107\n", + " 期末 34\n", + "dtype: int32" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 多层索引,两层,三层以上(规则一样)\n", + "s2 = Series(np.random.randint(0,150,size = 6),index = pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]))\n", + "s2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
张三期中73525
期末373656
李四期中14981142
期末711380
王五期中1194103
期末2512183
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "张三 期中 73 5 25\n", + " 期末 37 36 56\n", + "李四 期中 149 81 142\n", + " 期末 71 138 0\n", + "王五 期中 11 94 103\n", + " 期末 25 121 83" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = DataFrame(np.random.randint(0,150,size = (6,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]) )\n", + "\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
张三期中A153117
B8256123
期末A14278
B695017
李四期中A9187143
B12011839
期末A567655
B11105121
王五期中A147781
B128126146
期末A4945114
B1212677
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "张三 期中 A 15 31 17\n", + " B 82 56 123\n", + " 期末 A 14 2 78\n", + " B 69 50 17\n", + "李四 期中 A 91 87 143\n", + " B 120 118 39\n", + " 期末 A 56 76 55\n", + " B 11 105 121\n", + "王五 期中 A 147 78 1\n", + " B 128 126 146\n", + " 期末 A 49 45 114\n", + " B 121 26 77" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 三层索引\n", + "df3 = DataFrame(np.random.randint(0,150,size = (12,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末'],['A','B']]) )\n", + "\n", + "df3" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "73" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 先获取列后获取行\n", + "df['Python']['张三']['期中']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = df.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
张三期中73525
期末373656
李四期中14981142
期末711380
王五期中1194103
期末2512183
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "张三 期中 73 5 25\n", + " 期末 37 36 56\n", + "李四 期中 149 81 142\n", + " 期末 71 138 0\n", + "王五 期中 11 94 103\n", + " 期末 25 121 83" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.sort_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "73" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 先获取行,后获取列\n", + "df.loc['张三'].loc['期中']['Python']" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
张三期中73525
期末373656
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "张三 期中 73 5 25\n", + " 期末 37 36 56" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[[0,1]]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git "a/Day76-90/code/5-pandas\345\244\232\345\261\202\347\264\242\345\274\225\350\256\241\347\256\227.ipynb" "b/Day76-90/code/5-pandas\345\244\232\345\261\202\347\264\242\345\274\225\350\256\241\347\256\227.ipynb" new file mode 100644 index 0000000..22e1c8e --- /dev/null +++ "b/Day76-90/code/5-pandas\345\244\232\345\261\202\347\264\242\345\274\225\350\256\241\347\256\227.ipynb" @@ -0,0 +1,1000 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "\n", + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
期中期末期中期末期中期末
A1311011731517
B6234531012457
C247636117123105
D11246794246122
E661131044510108
F11110844113221
\n", + "
" + ], + "text/plain": [ + " Python En Math \n", + " 期中 期末 期中 期末 期中 期末\n", + "A 131 101 1 73 15 17\n", + "B 62 34 53 101 24 57\n", + "C 24 76 36 117 123 105\n", + "D 112 46 79 42 46 122\n", + "E 66 113 104 45 10 108\n", + "F 111 108 4 41 132 21" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 多层列索引\n", + "df = DataFrame(np.random.randint(0,150,size = (6,6)),index = list('ABCDEF'),\n", + " columns=pd.MultiIndex.from_product([['Python','En','Math'],['期中','期末']]))\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 期中 84.3\n", + " 期末 79.7\n", + "En 期中 46.2\n", + " 期末 69.8\n", + "Math 期中 58.3\n", + " 期末 71.7\n", + "dtype: float64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# round保留2位小数\n", + "df.mean().round(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
期中期末期中期末期中期末
A1311011731517
B6234531012457
C247636117123105
D11246794246122
E661131044510108
F11110844113221
\n", + "
" + ], + "text/plain": [ + " Python En Math \n", + " 期中 期末 期中 期末 期中 期末\n", + "A 131 101 1 73 15 17\n", + "B 62 34 53 101 24 57\n", + "C 24 76 36 117 123 105\n", + "D 112 46 79 42 46 122\n", + "E 66 113 104 45 10 108\n", + "F 111 108 4 41 132 21" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
A116.037.016.0
B48.077.040.5
C50.076.5114.0
D79.060.584.0
E89.574.559.0
F109.522.576.5
\n", + "
" + ], + "text/plain": [ + " Python En Math\n", + "A 116.0 37.0 16.0\n", + "B 48.0 77.0 40.5\n", + "C 50.0 76.5 114.0\n", + "D 79.0 60.5 84.0\n", + "E 89.5 74.5 59.0\n", + "F 109.5 22.5 76.5" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# axis = 0代表行\n", + "# axis = 1代表列\n", + "df.mean(axis = 1,level = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
期中期末
A49.063.7
B46.364.0
C61.099.3
D79.070.0
E60.088.7
F82.356.7
\n", + "
" + ], + "text/plain": [ + " 期中 期末\n", + "A 49.0 63.7\n", + "B 46.3 64.0\n", + "C 61.0 99.3\n", + "D 79.0 70.0\n", + "E 60.0 88.7\n", + "F 82.3 56.7" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.mean(axis = 1,level = 1).round(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonEnMath
期中期末期中期末期中期末
A1311011731517
B6234531012457
C247636117123105
D11246794246122
E661131044510108
F11110844113221
\n", + "
" + ], + "text/plain": [ + " Python En Math \n", + " 期中 期末 期中 期末 期中 期末\n", + "A 131 101 1 73 15 17\n", + "B 62 34 53 101 24 57\n", + "C 24 76 36 117 123 105\n", + "D 112 46 79 42 46 122\n", + "E 66 113 104 45 10 108\n", + "F 111 108 4 41 132 21" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EnMathPython
A期中115131
期末7317101
B期中532462
期末1015734
C期中3612324
期末11710576
D期中7946112
期末4212246
E期中1041066
期末45108113
F期中4132111
期末4121108
\n", + "
" + ], + "text/plain": [ + " En Math Python\n", + "A 期中 1 15 131\n", + " 期末 73 17 101\n", + "B 期中 53 24 62\n", + " 期末 101 57 34\n", + "C 期中 36 123 24\n", + " 期末 117 105 76\n", + "D 期中 79 46 112\n", + " 期末 42 122 46\n", + "E 期中 104 10 66\n", + " 期末 45 108 113\n", + "F 期中 4 132 111\n", + " 期末 41 21 108" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 行和列的多层索引,进行转换\n", + "# Stack the prescribed level(s) from columns to index.\n", + "# 从列变成行\n", + "df2 = df.stack(level = 1)\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EnMathPython
ABCDEFABCDEFABCDEF
期中1533679104415241234610132131622411266111
期末73101117424541175710512210821101347646113108
\n", + "
" + ], + "text/plain": [ + " En Math Python \n", + " A B C D E F A B C D E F A B C D E F\n", + "期中 1 53 36 79 104 4 15 24 123 46 10 132 131 62 24 112 66 111\n", + "期末 73 101 117 42 45 41 17 57 105 122 108 21 101 34 76 46 113 108" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 从行变成列\n", + "df2.unstack(level= 0 )" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EnMathPython
期中期末期中期末期中期末
A1731517131101
B5310124576234
C361171231052476
D79424612211246
E104451010866113
F44113221111108
\n", + "
" + ], + "text/plain": [ + " En Math Python \n", + " 期中 期末 期中 期末 期中 期末\n", + "A 1 73 15 17 131 101\n", + "B 53 101 24 57 62 34\n", + "C 36 117 123 105 24 76\n", + "D 79 42 46 122 112 46\n", + "E 104 45 10 108 66 113\n", + "F 4 41 132 21 111 108" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.unstack(level = 1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git "a/Day76-90/code/6-pandas\346\225\260\346\215\256\351\233\206\346\210\220.ipynb" "b/Day76-90/code/6-pandas\346\225\260\346\215\256\351\233\206\346\210\220.ipynb" new file mode 100644 index 0000000..e128ee4 --- /dev/null +++ "b/Day76-90/code/6-pandas\346\225\260\346\215\256\351\233\206\346\210\220.ipynb" @@ -0,0 +1,1209 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 数据分析数据挖掘\n", + "# 有数据情况下:\n", + "# 数据预处理\n", + "# 数据清洗(空数据,异常值)\n", + "# 数据集成(多个数据合并到一起,级联)数据可能存放在多个表中\n", + "# 数据转化\n", + "# 数据规约(属性减少(不重要的属性删除),数据减少去重操作)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 5, 12, 67, 29, 46, 103, 53, 53, 139, 87],\n", + " [126, 33, 55, 104, 45, 70, 96, 133, 116, 43],\n", + " [ 84, 45, 17, 42, 19, 11, 125, 43, 54, 39],\n", + " [ 97, 68, 99, 90, 28, 60, 135, 84, 111, 63],\n", + " [114, 56, 30, 81, 48, 73, 119, 65, 20, 22]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "array([[115, 128, 122, 127, 4, 135, 26, 25, 131, 139],\n", + " [ 66, 119, 37, 136, 101, 40, 102, 127, 148, 127],\n", + " [ 89, 80, 140, 133, 51, 142, 47, 27, 54, 23],\n", + " [ 64, 127, 33, 128, 60, 106, 67, 94, 110, 76],\n", + " [ 6, 21, 23, 96, 10, 62, 26, 79, 149, 43],\n", + " [116, 143, 132, 118, 68, 21, 57, 133, 124, 124]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 首先看numpy数组的集成\n", + "nd1 = np.random.randint(0,150,size = (5,10))\n", + "\n", + "nd2 = np.random.randint(0,150,size = (6,10))\n", + "display(nd1,nd2)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 5, 12, 67, 29, 46, 103, 53, 53, 139, 87],\n", + " [126, 33, 55, 104, 45, 70, 96, 133, 116, 43],\n", + " [ 84, 45, 17, 42, 19, 11, 125, 43, 54, 39],\n", + " [ 97, 68, 99, 90, 28, 60, 135, 84, 111, 63],\n", + " [114, 56, 30, 81, 48, 73, 119, 65, 20, 22],\n", + " [115, 128, 122, 127, 4, 135, 26, 25, 131, 139],\n", + " [ 66, 119, 37, 136, 101, 40, 102, 127, 148, 127],\n", + " [ 89, 80, 140, 133, 51, 142, 47, 27, 54, 23],\n", + " [ 64, 127, 33, 128, 60, 106, 67, 94, 110, 76],\n", + " [ 6, 21, 23, 96, 10, 62, 26, 79, 149, 43],\n", + " [116, 143, 132, 118, 68, 21, 57, 133, 124, 124]])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 原来数据一个5行,一个是6行,级联之后变成了11行\n", + "nd3 = np.concatenate([nd1,nd2],axis = 0)\n", + "nd3" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[110, 38, 144, 92, 38, 2, 67, 2, 103, 81],\n", + " [ 56, 61, 61, 22, 108, 145, 95, 44, 40, 100],\n", + " [ 65, 74, 85, 123, 47, 117, 35, 55, 120, 20],\n", + " [ 15, 9, 4, 84, 71, 133, 140, 13, 71, 91],\n", + " [ 94, 31, 41, 5, 7, 32, 50, 24, 18, 120]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "array([[ 65, 149, 86, 138, 98],\n", + " [136, 49, 102, 45, 140],\n", + " [ 13, 124, 94, 81, 73],\n", + " [ 82, 38, 0, 75, 94],\n", + " [146, 28, 143, 61, 49]])" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "nd1 = np.random.randint(0,150,size = (5,10))\n", + "\n", + "nd2 = np.random.randint(0,150,size = (5,5))\n", + "display(nd1,nd2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[110, 38, 144, 92, 38, 2, 67, 2, 103, 81, 65, 149, 86,\n", + " 138, 98],\n", + " [ 56, 61, 61, 22, 108, 145, 95, 44, 40, 100, 136, 49, 102,\n", + " 45, 140],\n", + " [ 65, 74, 85, 123, 47, 117, 35, 55, 120, 20, 13, 124, 94,\n", + " 81, 73],\n", + " [ 15, 9, 4, 84, 71, 133, 140, 13, 71, 91, 82, 38, 0,\n", + " 75, 94],\n", + " [ 94, 31, 41, 5, 7, 32, 50, 24, 18, 120, 146, 28, 143,\n", + " 61, 49]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# axis = 0行级联(第一维度的级联),axis = 1(第二个维度的级联,列的级联)\n", + "np.concatenate((nd1,nd2),axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pandas级联操作,pandas基于numpy\n", + "# pandas的级联类似" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
A1135380
B1354052
C1441864
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "A 113 53 80\n", + "B 135 40 52\n", + "C 144 18 64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
D126118146
E1478127
F87631
G359533
H13011791
I12498122
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "D 126 118 146\n", + "E 147 81 27\n", + "F 87 63 1\n", + "G 35 95 33\n", + "H 130 117 91\n", + "I 124 98 122" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df1 = DataFrame(np.random.randint(0,150,size = (3,3)),index = list('ABC'),columns=['Python','Math','En'])\n", + "\n", + "df2 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('DEFGHI'),columns=['Python','Math','En'])\n", + "\n", + "display(df1,df2)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
A1135380
B1354052
C1441864
D126118146
E1478127
F87631
G359533
H13011791
I12498122
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "A 113 53 80\n", + "B 135 40 52\n", + "C 144 18 64\n", + "D 126 118 146\n", + "E 147 81 27\n", + "F 87 63 1\n", + "G 35 95 33\n", + "H 130 117 91\n", + "I 124 98 122" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pandas汇总数据,数据集成\n", + "df1.append(df2)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
A1135380
B1354052
C1441864
D126118146
E1478127
F87631
G359533
H13011791
I12498122
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "A 113 53 80\n", + "B 135 40 52\n", + "C 144 18 64\n", + "D 126 118 146\n", + "E 147 81 27\n", + "F 87 63 1\n", + "G 35 95 33\n", + "H 130 117 91\n", + "I 124 98 122" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\python36\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", + "of pandas will change to not sort by default.\n", + "\n", + "To accept the future behavior, pass 'sort=False'.\n", + "\n", + "To retain the current behavior and silence the warning, pass 'sort=True'.\n", + "\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEnPythonMathEn
A113.053.080.0NaNNaNNaN
B135.040.052.0NaNNaNNaN
C144.018.064.0NaNNaNNaN
DNaNNaNNaN126.0118.0146.0
ENaNNaNNaN147.081.027.0
FNaNNaNNaN87.063.01.0
GNaNNaNNaN35.095.033.0
HNaNNaNNaN130.0117.091.0
INaNNaNNaN124.098.0122.0
\n", + "
" + ], + "text/plain": [ + " Python Math En Python Math En\n", + "A 113.0 53.0 80.0 NaN NaN NaN\n", + "B 135.0 40.0 52.0 NaN NaN NaN\n", + "C 144.0 18.0 64.0 NaN NaN NaN\n", + "D NaN NaN NaN 126.0 118.0 146.0\n", + "E NaN NaN NaN 147.0 81.0 27.0\n", + "F NaN NaN NaN 87.0 63.0 1.0\n", + "G NaN NaN NaN 35.0 95.0 33.0\n", + "H NaN NaN NaN 130.0 117.0 91.0\n", + "I NaN NaN NaN 124.0 98.0 122.0" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2],axis = 1,ignore_index = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
A225813
B995735
C512824
E560111
F13723121
G4978115
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "A 22 58 13\n", + "B 99 57 35\n", + "C 51 28 24\n", + "E 5 60 111\n", + "F 137 23 121\n", + "G 49 78 115" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
A11811381
B5122126
C0115128
E10013094
F4993140
G705994
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "A 118 113 81\n", + "B 51 22 126\n", + "C 0 115 128\n", + "E 100 130 94\n", + "F 49 93 140\n", + "G 70 59 94" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 期中\n", + "df1 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCEFG'),columns=['Python','Math','En'])\n", + "\n", + "# 期末\n", + "df2 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCEFG'),columns=['Python','Math','En'])\n", + "\n", + "display(df1,df2)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
期中A225813
B995735
C512824
E560111
F13723121
G4978115
期末A11811381
B5122126
C0115128
E10013094
F4993140
G705994
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "期中 A 22 58 13\n", + " B 99 57 35\n", + " C 51 28 24\n", + " E 5 60 111\n", + " F 137 23 121\n", + " G 49 78 115\n", + "期末 A 118 113 81\n", + " B 51 22 126\n", + " C 0 115 128\n", + " E 100 130 94\n", + " F 49 93 140\n", + " G 70 59 94" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3 = pd.concat([df1,df2],axis = 0,keys = ['期中','期末'])\n", + "df3" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
A期中225813
期末11811381
B期中995735
期末5122126
C期中512824
期末0115128
E期中560111
期末10013094
F期中13723121
期末4993140
G期中4978115
期末705994
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "A 期中 22 58 13\n", + " 期末 118 113 81\n", + "B 期中 99 57 35\n", + " 期末 51 22 126\n", + "C 期中 51 28 24\n", + " 期末 0 115 128\n", + "E 期中 5 60 111\n", + " 期末 100 130 94\n", + "F 期中 137 23 121\n", + " 期末 49 93 140\n", + "G 期中 49 78 115\n", + " 期末 70 59 94" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3.unstack(level = 0).stack()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git "a/Day76-90/code/7-pandas\346\225\260\346\215\256\351\233\206\346\210\220merge.ipynb" "b/Day76-90/code/7-pandas\346\225\260\346\215\256\351\233\206\346\210\220merge.ipynb" new file mode 100644 index 0000000..8b2eefc --- /dev/null +++ "b/Day76-90/code/7-pandas\346\225\260\346\215\256\351\233\206\346\210\220merge.ipynb" @@ -0,0 +1,1272 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 上一讲,append,concat数据集成方法\n", + "# merge融合,根据某一共同属性进行级联,高级用法" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namesexid
0A1
1B2
2C3
3D4
4E5
5F6
\n", + "
" + ], + "text/plain": [ + " name sex id\n", + "0 A 男 1\n", + "1 B 女 2\n", + "2 C 女 3\n", + "3 D 女 4\n", + "4 E 男 5\n", + "5 F 男 6" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1 = DataFrame({'name':['A','B','C','D','E','F'],\n", + " 'sex':['男','女','女','女','男','男'],\n", + " 'id':[1,2,3,4,5,6]})\n", + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesalaryid
022120001
125150002
227200003
321300004
418100005
52980007
\n", + "
" + ], + "text/plain": [ + " age salary id\n", + "0 22 12000 1\n", + "1 25 15000 2\n", + "2 27 20000 3\n", + "3 21 30000 4\n", + "4 18 10000 5\n", + "5 29 8000 7" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = DataFrame({'age':[22,25,27,21,18,29],'salary':[12000,15000,20000,30000,10000,8000],'id':[1,2,3,4,5,7]})\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\python36\\lib\\site-packages\\pandas\\core\\frame.py:6692: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", + "of pandas will change to not sort by default.\n", + "\n", + "To accept the future behavior, pass 'sort=False'.\n", + "\n", + "To retain the current behavior and silence the warning, pass 'sort=True'.\n", + "\n", + " sort=sort)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageidnamesalarysex
0NaN1ANaN
1NaN2BNaN
2NaN3CNaN
3NaN4DNaN
4NaN5ENaN
5NaN6FNaN
022.01NaN12000.0NaN
125.02NaN15000.0NaN
227.03NaN20000.0NaN
321.04NaN30000.0NaN
418.05NaN10000.0NaN
529.07NaN8000.0NaN
\n", + "
" + ], + "text/plain": [ + " age id name salary sex\n", + "0 NaN 1 A NaN 男\n", + "1 NaN 2 B NaN 女\n", + "2 NaN 3 C NaN 女\n", + "3 NaN 4 D NaN 女\n", + "4 NaN 5 E NaN 男\n", + "5 NaN 6 F NaN 男\n", + "0 22.0 1 NaN 12000.0 NaN\n", + "1 25.0 2 NaN 15000.0 NaN\n", + "2 27.0 3 NaN 20000.0 NaN\n", + "3 21.0 4 NaN 30000.0 NaN\n", + "4 18.0 5 NaN 10000.0 NaN\n", + "5 29.0 7 NaN 8000.0 NaN" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.append(df2)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namesexidagesalaryid
0A122120001
1B225150002
2C327200003
3D421300004
4E518100005
5F62980007
\n", + "
" + ], + "text/plain": [ + " name sex id age salary id\n", + "0 A 男 1 22 12000 1\n", + "1 B 女 2 25 15000 2\n", + "2 C 女 3 27 20000 3\n", + "3 D 女 4 21 30000 4\n", + "4 E 男 5 18 10000 5\n", + "5 F 男 6 29 8000 7" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2],axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namesexidagesalary
0A12212000
1B22515000
2C32720000
3D42130000
4E51810000
\n", + "
" + ], + "text/plain": [ + " name sex id age salary\n", + "0 A 男 1 22 12000\n", + "1 B 女 2 25 15000\n", + "2 C 女 3 27 20000\n", + "3 D 女 4 21 30000\n", + "4 E 男 5 18 10000" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.merge(df2)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namesexidagesalary
0A122.012000.0
1B225.015000.0
2C327.020000.0
3D421.030000.0
4E518.010000.0
5F6NaNNaN
6NaNNaN729.08000.0
\n", + "
" + ], + "text/plain": [ + " name sex id age salary\n", + "0 A 男 1 22.0 12000.0\n", + "1 B 女 2 25.0 15000.0\n", + "2 C 女 3 27.0 20000.0\n", + "3 D 女 4 21.0 30000.0\n", + "4 E 男 5 18.0 10000.0\n", + "5 F 男 6 NaN NaN\n", + "6 NaN NaN 7 29.0 8000.0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.merge(df2,how = 'outer')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
A401590
B595283
C14138137
D897853
E811013
F757986
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "A 40 15 90\n", + "B 59 52 83\n", + "C 14 138 137\n", + "D 89 78 53\n", + "E 81 101 3\n", + "F 75 79 86" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCDEF'),columns=['Python','Math','En'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Python 59.7\n", + "Math 77.2\n", + "En 75.3\n", + "dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = df.mean().round(1)\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score_mean
Python59.7
Math77.2
En75.3
\n", + "
" + ], + "text/plain": [ + " score_mean\n", + "Python 59.7\n", + "Math 77.2\n", + "En 75.3" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = DataFrame(s)\n", + "df2.columns = ['score_mean']\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
score_mean59.777.275.3
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "score_mean 59.7 77.2 75.3" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3 = df2.T\n", + "df3" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEn
A40.015.090.0
B59.052.083.0
C14.0138.0137.0
D89.078.053.0
E81.0101.03.0
F75.079.086.0
score_mean59.777.275.3
\n", + "
" + ], + "text/plain": [ + " Python Math En\n", + "A 40.0 15.0 90.0\n", + "B 59.0 52.0 83.0\n", + "C 14.0 138.0 137.0\n", + "D 89.0 78.0 53.0\n", + "E 81.0 101.0 3.0\n", + "F 75.0 79.0 86.0\n", + "score_mean 59.7 77.2 75.3" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4 = df.append(df3)\n", + "df4" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score_mean
A48.3
B64.7
C96.3
D73.3
E61.7
F80.0
score_mean70.7
\n", + "
" + ], + "text/plain": [ + " score_mean\n", + "A 48.3\n", + "B 64.7\n", + "C 96.3\n", + "D 73.3\n", + "E 61.7\n", + "F 80.0\n", + "score_mean 70.7" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df5 = DataFrame(df4.mean(axis = 1).round(1))\n", + "df5.columns = ['score_mean']\n", + "df5" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PythonMathEnscore_mean
A40.015.090.048.3
B59.052.083.064.7
C14.0138.0137.096.3
D89.078.053.073.3
E81.0101.03.061.7
F75.079.086.080.0
score_mean59.777.275.370.7
\n", + "
" + ], + "text/plain": [ + " Python Math En score_mean\n", + "A 40.0 15.0 90.0 48.3\n", + "B 59.0 52.0 83.0 64.7\n", + "C 14.0 138.0 137.0 96.3\n", + "D 89.0 78.0 53.0 73.3\n", + "E 81.0 101.0 3.0 61.7\n", + "F 75.0 79.0 86.0 80.0\n", + "score_mean 59.7 77.2 75.3 70.7" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df4.merge(df5,left_index=True,right_index=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git "a/Day76-90/code/8-pandas\345\210\206\347\273\204\350\201\232\345\220\210\346\223\215\344\275\234.ipynb" "b/Day76-90/code/8-pandas\345\210\206\347\273\204\350\201\232\345\220\210\346\223\215\344\275\234.ipynb" new file mode 100644 index 0000000..7c16aff --- /dev/null +++ "b/Day76-90/code/8-pandas\345\210\206\347\273\204\350\201\232\345\220\210\346\223\215\344\275\234.ipynb" @@ -0,0 +1,877 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# 分组聚合透视\n", + "# 很多时候属性是相似的\n", + "\n", + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "\n", + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HandSmokesexweightIQ
0rightyesmale80100
1leftyesfemale50120
2leftnofemale4890
3rightnomale75130
4rightyesmale68140
5rightnomale10080
6rightnofemale4094
7rightnofemale90110
8leftnomale88100
9rightyesfemale76160
\n", + "
" + ], + "text/plain": [ + " Hand Smoke sex weight IQ\n", + "0 right yes male 80 100\n", + "1 left yes female 50 120\n", + "2 left no female 48 90\n", + "3 right no male 75 130\n", + "4 right yes male 68 140\n", + "5 right no male 100 80\n", + "6 right no female 40 94\n", + "7 right no female 90 110\n", + "8 left no male 88 100\n", + "9 right yes female 76 160" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 走右手习惯,是否抽烟,性别,对体重,智商,有一定影响\n", + "\n", + "df = DataFrame({'Hand':['right','left','left','right','right','right','right','right','left','right'],\n", + " 'Smoke':['yes','yes','no','no','yes','no','no','no','no','yes'],\n", + " 'sex':['male','female','female','male','male','male','female','female','male','female'],\n", + " 'weight':[80,50,48,75,68,100,40,90,88,76],\n", + " 'IQ':[100,120,90,130,140,80,94,110,100,160]})\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 分组聚合查看规律,某一条件下规律" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightIQ
Hand
left62.0103.3
right75.6116.3
\n", + "
" + ], + "text/plain": [ + " weight IQ\n", + "Hand \n", + "left 62.0 103.3\n", + "right 75.6 116.3" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = df.groupby(by = ['Hand'])[['weight','IQ']].mean().round(1)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weight
Hand
left62.0
right75.6
\n", + "
" + ], + "text/plain": [ + " weight\n", + "Hand \n", + "left 62.0\n", + "right 75.6" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(by = ['Hand'])[['weight']].apply(np.mean).round(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = df.groupby(by = ['Hand'])[['weight']].transform(np.mean).round(1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weight_mean
075.6
162.0
262.0
375.6
475.6
575.6
675.6
775.6
862.0
975.6
\n", + "
" + ], + "text/plain": [ + " weight_mean\n", + "0 75.6\n", + "1 62.0\n", + "2 62.0\n", + "3 75.6\n", + "4 75.6\n", + "5 75.6\n", + "6 75.6\n", + "7 75.6\n", + "8 62.0\n", + "9 75.6" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = df2.add_suffix('_mean')\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HandSmokesexweightIQweight_mean
0rightyesmale8010075.6
1leftyesfemale5012062.0
2leftnofemale489062.0
3rightnomale7513075.6
4rightyesmale6814075.6
5rightnomale1008075.6
6rightnofemale409475.6
7rightnofemale9011075.6
8leftnomale8810062.0
9rightyesfemale7616075.6
\n", + "
" + ], + "text/plain": [ + " Hand Smoke sex weight IQ weight_mean\n", + "0 right yes male 80 100 75.6\n", + "1 left yes female 50 120 62.0\n", + "2 left no female 48 90 62.0\n", + "3 right no male 75 130 75.6\n", + "4 right yes male 68 140 75.6\n", + "5 right no male 100 80 75.6\n", + "6 right no female 40 94 75.6\n", + "7 right no female 90 110 75.6\n", + "8 left no male 88 100 62.0\n", + "9 right yes female 76 160 75.6" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3 = df.merge(df2,left_index=True,right_index=True)\n", + "df3" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Hand\n", + "left ([3, 3], [62.0, 103.3])\n", + "right ([7, 7], [75.6, 116.3])\n", + "dtype: object" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def count(x):\n", + " \n", + " return (x.count(),x.mean().round(1))\n", + "\n", + "df.groupby(by = ['Hand'])[['weight','IQ']].apply(count)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IQ
Handsex
leftfemale120
male100
rightfemale160
male140
\n", + "
" + ], + "text/plain": [ + " IQ\n", + "Hand sex \n", + "left female 120\n", + " male 100\n", + "right female 160\n", + " male 140" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(by = ['Hand','sex'])[['IQ']].max()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = df.groupby(by = ['Hand'])['IQ','weight']\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IQweight
maxmeanmaxmean
Hand
left120103.38862.0
right160116.310075.6
\n", + "
" + ], + "text/plain": [ + " IQ weight \n", + " max mean max mean\n", + "Hand \n", + "left 120 103.3 88 62.0\n", + "right 160 116.3 100 75.6" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.agg(['max','mean']).round(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IQweight
Hand
left12062.0
right16075.6
\n", + "
" + ], + "text/plain": [ + " IQ weight\n", + "Hand \n", + "left 120 62.0\n", + "right 160 75.6" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.agg({'IQ':'max','weight':'mean'}).round(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git "a/Day76-90/code/9-pandas\346\225\260\346\215\256\351\233\206\346\210\220\345\256\236\346\210\230.ipynb" "b/Day76-90/code/9-pandas\346\225\260\346\215\256\351\233\206\346\210\220\345\256\236\346\210\230.ipynb" new file mode 100644 index 0000000..063a939 --- /dev/null +++ "b/Day76-90/code/9-pandas\346\225\260\346\215\256\351\233\206\346\210\220\345\256\236\346\210\230.ipynb" @@ -0,0 +1,6213 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "\n", + "from pandas import Series,DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# csv类型文件呢,文本文件,excel打开,格式化的文件,所以excel可以直接读取成表格\n", + "# 美国人口的一些情况\n", + "# pandas分析一下美国人口数据" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statearea (sq. mi)
0Alabama52423
1Alaska656425
2Arizona114006
3Arkansas53182
4California163707
5Colorado104100
6Connecticut5544
7Delaware1954
8Florida65758
9Georgia59441
10Hawaii10932
11Idaho83574
12Illinois57918
13Indiana36420
14Iowa56276
15Kansas82282
16Kentucky40411
17Louisiana51843
18Maine35387
19Maryland12407
20Massachusetts10555
21Michigan96810
22Minnesota86943
23Mississippi48434
24Missouri69709
25Montana147046
26Nebraska77358
27Nevada110567
28New Hampshire9351
29New Jersey8722
30New Mexico121593
31New York54475
32North Carolina53821
33North Dakota70704
34Ohio44828
35Oklahoma69903
36Oregon98386
37Pennsylvania46058
38Rhode Island1545
39South Carolina32007
40South Dakota77121
41Tennessee42146
42Texas268601
43Utah84904
44Vermont9615
45Virginia42769
46Washington71303
47West Virginia24231
48Wisconsin65503
49Wyoming97818
50District of Columbia68
51Puerto Rico3515
\n", + "
" + ], + "text/plain": [ + " state area (sq. mi)\n", + "0 Alabama 52423\n", + "1 Alaska 656425\n", + "2 Arizona 114006\n", + "3 Arkansas 53182\n", + "4 California 163707\n", + "5 Colorado 104100\n", + "6 Connecticut 5544\n", + "7 Delaware 1954\n", + "8 Florida 65758\n", + "9 Georgia 59441\n", + "10 Hawaii 10932\n", + "11 Idaho 83574\n", + "12 Illinois 57918\n", + "13 Indiana 36420\n", + "14 Iowa 56276\n", + "15 Kansas 82282\n", + "16 Kentucky 40411\n", + "17 Louisiana 51843\n", + "18 Maine 35387\n", + "19 Maryland 12407\n", + "20 Massachusetts 10555\n", + "21 Michigan 96810\n", + "22 Minnesota 86943\n", + "23 Mississippi 48434\n", + "24 Missouri 69709\n", + "25 Montana 147046\n", + "26 Nebraska 77358\n", + "27 Nevada 110567\n", + "28 New Hampshire 9351\n", + "29 New Jersey 8722\n", + "30 New Mexico 121593\n", + "31 New York 54475\n", + "32 North Carolina 53821\n", + "33 North Dakota 70704\n", + "34 Ohio 44828\n", + "35 Oklahoma 69903\n", + "36 Oregon 98386\n", + "37 Pennsylvania 46058\n", + "38 Rhode Island 1545\n", + "39 South Carolina 32007\n", + "40 South Dakota 77121\n", + "41 Tennessee 42146\n", + "42 Texas 268601\n", + "43 Utah 84904\n", + "44 Vermont 9615\n", + "45 Virginia 42769\n", + "46 Washington 71303\n", + "47 West Virginia 24231\n", + "48 Wisconsin 65503\n", + "49 Wyoming 97818\n", + "50 District of Columbia 68\n", + "51 Puerto Rico 3515" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 美国各州的面积\n", + "areas = pd.read_csv('./state-areas.csv')\n", + "areas" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(52, 2)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "areas.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stateabbreviation
0AlabamaAL
1AlaskaAK
2ArizonaAZ
3ArkansasAR
4CaliforniaCA
5ColoradoCO
6ConnecticutCT
7DelawareDE
8District of ColumbiaDC
9FloridaFL
10GeorgiaGA
11HawaiiHI
12IdahoID
13IllinoisIL
14IndianaIN
15IowaIA
16KansasKS
17KentuckyKY
18LouisianaLA
19MaineME
20MontanaMT
21NebraskaNE
22NevadaNV
23New HampshireNH
24New JerseyNJ
25New MexicoNM
26New YorkNY
27North CarolinaNC
28North DakotaND
29OhioOH
30OklahomaOK
31OregonOR
32MarylandMD
33MassachusettsMA
34MichiganMI
35MinnesotaMN
36MississippiMS
37MissouriMO
38PennsylvaniaPA
39Rhode IslandRI
40South CarolinaSC
41South DakotaSD
42TennesseeTN
43TexasTX
44UtahUT
45VermontVT
46VirginiaVA
47WashingtonWA
48West VirginiaWV
49WisconsinWI
50WyomingWY
\n", + "
" + ], + "text/plain": [ + " state abbreviation\n", + "0 Alabama AL\n", + "1 Alaska AK\n", + "2 Arizona AZ\n", + "3 Arkansas AR\n", + "4 California CA\n", + "5 Colorado CO\n", + "6 Connecticut CT\n", + "7 Delaware DE\n", + "8 District of Columbia DC\n", + "9 Florida FL\n", + "10 Georgia GA\n", + "11 Hawaii HI\n", + "12 Idaho ID\n", + "13 Illinois IL\n", + "14 Indiana IN\n", + "15 Iowa IA\n", + "16 Kansas KS\n", + "17 Kentucky KY\n", + "18 Louisiana LA\n", + "19 Maine ME\n", + "20 Montana MT\n", + "21 Nebraska NE\n", + "22 Nevada NV\n", + "23 New Hampshire NH\n", + "24 New Jersey NJ\n", + "25 New Mexico NM\n", + "26 New York NY\n", + "27 North Carolina NC\n", + "28 North Dakota ND\n", + "29 Ohio OH\n", + "30 Oklahoma OK\n", + "31 Oregon OR\n", + "32 Maryland MD\n", + "33 Massachusetts MA\n", + "34 Michigan MI\n", + "35 Minnesota MN\n", + "36 Mississippi MS\n", + "37 Missouri MO\n", + "38 Pennsylvania PA\n", + "39 Rhode Island RI\n", + "40 South Carolina SC\n", + "41 South Dakota SD\n", + "42 Tennessee TN\n", + "43 Texas TX\n", + "44 Utah UT\n", + "45 Vermont VT\n", + "46 Virginia VA\n", + "47 Washington WA\n", + "48 West Virginia WV\n", + "49 Wisconsin WI\n", + "50 Wyoming WY" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 美国各州 缩写\n", + "abbrevs = pd.read_csv('./state-abbrevs.csv')\n", + "abbrevs" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(51, 2)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abbrevs.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulation
0ALunder1820121117489.0
1ALtotal20124817528.0
2ALunder1820101130966.0
3ALtotal20104785570.0
4ALunder1820111125763.0
5ALtotal20114801627.0
6ALtotal20094757938.0
7ALunder1820091134192.0
8ALunder1820131111481.0
9ALtotal20134833722.0
10ALtotal20074672840.0
11ALunder1820071132296.0
12ALtotal20084718206.0
13ALunder1820081134927.0
14ALtotal20054569805.0
15ALunder1820051117229.0
16ALtotal20064628981.0
17ALunder1820061126798.0
18ALtotal20044530729.0
19ALunder1820041113662.0
20ALtotal20034503491.0
21ALunder1820031113083.0
22ALtotal20014467634.0
23ALunder1820011120409.0
24ALtotal20024480089.0
25ALunder1820021116590.0
26ALunder1819991121287.0
27ALtotal19994430141.0
28ALtotal20004452173.0
29ALunder1820001122273.0
...............
2514USAunder18199971946051.0
2515USAtotal2000282162411.0
2516USAunder18200072376189.0
2517USAtotal1999279040181.0
2518USAtotal2001284968955.0
2519USAunder18200172671175.0
2520USAtotal2002287625193.0
2521USAunder18200272936457.0
2522USAtotal2003290107933.0
2523USAunder18200373100758.0
2524USAtotal2004292805298.0
2525USAunder18200473297735.0
2526USAtotal2005295516599.0
2527USAunder18200573523669.0
2528USAtotal2006298379912.0
2529USAunder18200673757714.0
2530USAtotal2007301231207.0
2531USAunder18200774019405.0
2532USAtotal2008304093966.0
2533USAunder18200874104602.0
2534USAunder18201373585872.0
2535USAtotal2013316128839.0
2536USAtotal2009306771529.0
2537USAunder18200974134167.0
2538USAunder18201074119556.0
2539USAtotal2010309326295.0
2540USAunder18201173902222.0
2541USAtotal2011311582564.0
2542USAunder18201273708179.0
2543USAtotal2012313873685.0
\n", + "

2544 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " state/region ages year population\n", + "0 AL under18 2012 1117489.0\n", + "1 AL total 2012 4817528.0\n", + "2 AL under18 2010 1130966.0\n", + "3 AL total 2010 4785570.0\n", + "4 AL under18 2011 1125763.0\n", + "5 AL total 2011 4801627.0\n", + "6 AL total 2009 4757938.0\n", + "7 AL under18 2009 1134192.0\n", + "8 AL under18 2013 1111481.0\n", + "9 AL total 2013 4833722.0\n", + "10 AL total 2007 4672840.0\n", + "11 AL under18 2007 1132296.0\n", + "12 AL total 2008 4718206.0\n", + "13 AL under18 2008 1134927.0\n", + "14 AL total 2005 4569805.0\n", + "15 AL under18 2005 1117229.0\n", + "16 AL total 2006 4628981.0\n", + "17 AL under18 2006 1126798.0\n", + "18 AL total 2004 4530729.0\n", + "19 AL under18 2004 1113662.0\n", + "20 AL total 2003 4503491.0\n", + "21 AL under18 2003 1113083.0\n", + "22 AL total 2001 4467634.0\n", + "23 AL under18 2001 1120409.0\n", + "24 AL total 2002 4480089.0\n", + "25 AL under18 2002 1116590.0\n", + "26 AL under18 1999 1121287.0\n", + "27 AL total 1999 4430141.0\n", + "28 AL total 2000 4452173.0\n", + "29 AL under18 2000 1122273.0\n", + "... ... ... ... ...\n", + "2514 USA under18 1999 71946051.0\n", + "2515 USA total 2000 282162411.0\n", + "2516 USA under18 2000 72376189.0\n", + "2517 USA total 1999 279040181.0\n", + "2518 USA total 2001 284968955.0\n", + "2519 USA under18 2001 72671175.0\n", + "2520 USA total 2002 287625193.0\n", + "2521 USA under18 2002 72936457.0\n", + "2522 USA total 2003 290107933.0\n", + "2523 USA under18 2003 73100758.0\n", + "2524 USA total 2004 292805298.0\n", + "2525 USA under18 2004 73297735.0\n", + "2526 USA total 2005 295516599.0\n", + "2527 USA under18 2005 73523669.0\n", + "2528 USA total 2006 298379912.0\n", + "2529 USA under18 2006 73757714.0\n", + "2530 USA total 2007 301231207.0\n", + "2531 USA under18 2007 74019405.0\n", + "2532 USA total 2008 304093966.0\n", + "2533 USA under18 2008 74104602.0\n", + "2534 USA under18 2013 73585872.0\n", + "2535 USA total 2013 316128839.0\n", + "2536 USA total 2009 306771529.0\n", + "2537 USA under18 2009 74134167.0\n", + "2538 USA under18 2010 74119556.0\n", + "2539 USA total 2010 309326295.0\n", + "2540 USA under18 2011 73902222.0\n", + "2541 USA total 2011 311582564.0\n", + "2542 USA under18 2012 73708179.0\n", + "2543 USA total 2012 313873685.0\n", + "\n", + "[2544 rows x 4 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 美国的人口数据\n", + "pop = pd.read_csv('./state-population.csv')\n", + "pop" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2544, 4)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulation
0ALunder1820121117489.0
1ALtotal20124817528.0
2ALunder1820101130966.0
3ALtotal20104785570.0
4ALunder1820111125763.0
\n", + "
" + ], + "text/plain": [ + " state/region ages year population\n", + "0 AL under18 2012 1117489.0\n", + "1 AL total 2012 4817528.0\n", + "2 AL under18 2010 1130966.0\n", + "3 AL total 2010 4785570.0\n", + "4 AL under18 2011 1125763.0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stateabbreviation
0AlabamaAL
1AlaskaAK
2ArizonaAZ
3ArkansasAR
4CaliforniaCA
\n", + "
" + ], + "text/plain": [ + " state abbreviation\n", + "0 Alabama AL\n", + "1 Alaska AK\n", + "2 Arizona AZ\n", + "3 Arkansas AR\n", + "4 California CA" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abbrevs.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(2544, 4)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(51, 2)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(pop.shape,abbrevs.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(2544, 6)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 级联时,数据变少了96个,哪些数据变少\n", + "# inner内连接,outer叫做外连接\n", + "pop2 = pop.merge(abbrevs,how = 'outer',left_on='state/region',right_on='abbreviation')\n", + "pop2.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state/region False\n", + "ages False\n", + "year False\n", + "population True\n", + "state True\n", + "abbreviation True\n", + "dtype: bool" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 前三列没有空值\n", + "pop2.isnull().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulationstateabbreviation
0ALunder1820121117489.0AlabamaAL
1ALtotal20124817528.0AlabamaAL
2ALunder1820101130966.0AlabamaAL
3ALtotal20104785570.0AlabamaAL
4ALunder1820111125763.0AlabamaAL
\n", + "
" + ], + "text/plain": [ + " state/region ages year population state abbreviation\n", + "0 AL under18 2012 1117489.0 Alabama AL\n", + "1 AL total 2012 4817528.0 Alabama AL\n", + "2 AL under18 2010 1130966.0 Alabama AL\n", + "3 AL total 2010 4785570.0 Alabama AL\n", + "4 AL under18 2011 1125763.0 Alabama AL" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# 删除一列\n", + "pop2.drop(labels = 'abbreviation',axis = 1,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulationstate
0ALunder1820121117489.0Alabama
1ALtotal20124817528.0Alabama
2ALunder1820101130966.0Alabama
3ALtotal20104785570.0Alabama
4ALunder1820111125763.0Alabama
\n", + "
" + ], + "text/plain": [ + " state/region ages year population state\n", + "0 AL under18 2012 1117489.0 Alabama\n", + "1 AL total 2012 4817528.0 Alabama\n", + "2 AL under18 2010 1130966.0 Alabama\n", + "3 AL total 2010 4785570.0 Alabama\n", + "4 AL under18 2011 1125763.0 Alabama" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state/region False\n", + "ages False\n", + "year False\n", + "population True\n", + "state True\n", + "dtype: bool" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.isnull().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 False\n", + "9 False\n", + "10 False\n", + "11 False\n", + "12 False\n", + "13 False\n", + "14 False\n", + "15 False\n", + "16 False\n", + "17 False\n", + "18 False\n", + "19 False\n", + "20 False\n", + "21 False\n", + "22 False\n", + "23 False\n", + "24 False\n", + "25 False\n", + "26 False\n", + "27 False\n", + "28 False\n", + "29 False\n", + " ... \n", + "2514 True\n", + "2515 True\n", + "2516 True\n", + "2517 True\n", + "2518 True\n", + "2519 True\n", + "2520 True\n", + "2521 True\n", + "2522 True\n", + "2523 True\n", + "2524 True\n", + "2525 True\n", + "2526 True\n", + "2527 True\n", + "2528 True\n", + "2529 True\n", + "2530 True\n", + "2531 True\n", + "2532 True\n", + "2533 True\n", + "2534 True\n", + "2535 True\n", + "2536 True\n", + "2537 True\n", + "2538 True\n", + "2539 True\n", + "2540 True\n", + "2541 True\n", + "2542 True\n", + "2543 True\n", + "Name: state, Length: 2544, dtype: bool" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 定位为空的数据\n", + "cond = pop2['state'].isnull()\n", + "cond" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['PR', 'USA'], dtype=object)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 只有当state为空,返回,为空时True\n", + "# 去重操作,非重复值\n", + "pop2[cond]['state/region'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(51, 2)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abbrevs.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(52, 2)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "areas.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statearea (sq. mi)
0Alabama52423
1Alaska656425
2Arizona114006
3Arkansas53182
4California163707
5Colorado104100
6Connecticut5544
7Delaware1954
8Florida65758
9Georgia59441
10Hawaii10932
11Idaho83574
12Illinois57918
13Indiana36420
14Iowa56276
15Kansas82282
16Kentucky40411
17Louisiana51843
18Maine35387
19Maryland12407
20Massachusetts10555
21Michigan96810
22Minnesota86943
23Mississippi48434
24Missouri69709
25Montana147046
26Nebraska77358
27Nevada110567
28New Hampshire9351
29New Jersey8722
30New Mexico121593
31New York54475
32North Carolina53821
33North Dakota70704
34Ohio44828
35Oklahoma69903
36Oregon98386
37Pennsylvania46058
38Rhode Island1545
39South Carolina32007
40South Dakota77121
41Tennessee42146
42Texas268601
43Utah84904
44Vermont9615
45Virginia42769
46Washington71303
47West Virginia24231
48Wisconsin65503
49Wyoming97818
50District of Columbia68
51Puerto Rico3515
\n", + "
" + ], + "text/plain": [ + " state area (sq. mi)\n", + "0 Alabama 52423\n", + "1 Alaska 656425\n", + "2 Arizona 114006\n", + "3 Arkansas 53182\n", + "4 California 163707\n", + "5 Colorado 104100\n", + "6 Connecticut 5544\n", + "7 Delaware 1954\n", + "8 Florida 65758\n", + "9 Georgia 59441\n", + "10 Hawaii 10932\n", + "11 Idaho 83574\n", + "12 Illinois 57918\n", + "13 Indiana 36420\n", + "14 Iowa 56276\n", + "15 Kansas 82282\n", + "16 Kentucky 40411\n", + "17 Louisiana 51843\n", + "18 Maine 35387\n", + "19 Maryland 12407\n", + "20 Massachusetts 10555\n", + "21 Michigan 96810\n", + "22 Minnesota 86943\n", + "23 Mississippi 48434\n", + "24 Missouri 69709\n", + "25 Montana 147046\n", + "26 Nebraska 77358\n", + "27 Nevada 110567\n", + "28 New Hampshire 9351\n", + "29 New Jersey 8722\n", + "30 New Mexico 121593\n", + "31 New York 54475\n", + "32 North Carolina 53821\n", + "33 North Dakota 70704\n", + "34 Ohio 44828\n", + "35 Oklahoma 69903\n", + "36 Oregon 98386\n", + "37 Pennsylvania 46058\n", + "38 Rhode Island 1545\n", + "39 South Carolina 32007\n", + "40 South Dakota 77121\n", + "41 Tennessee 42146\n", + "42 Texas 268601\n", + "43 Utah 84904\n", + "44 Vermont 9615\n", + "45 Virginia 42769\n", + "46 Washington 71303\n", + "47 West Virginia 24231\n", + "48 Wisconsin 65503\n", + "49 Wyoming 97818\n", + "50 District of Columbia 68\n", + "51 Puerto Rico 3515" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "areas" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 False\n", + "9 False\n", + "10 False\n", + "11 False\n", + "12 False\n", + "13 False\n", + "14 False\n", + "15 False\n", + "16 False\n", + "17 False\n", + "18 False\n", + "19 False\n", + "20 False\n", + "21 False\n", + "22 False\n", + "23 False\n", + "24 False\n", + "25 False\n", + "26 False\n", + "27 False\n", + "28 False\n", + "29 False\n", + " ... \n", + "2514 False\n", + "2515 False\n", + "2516 False\n", + "2517 False\n", + "2518 False\n", + "2519 False\n", + "2520 False\n", + "2521 False\n", + "2522 False\n", + "2523 False\n", + "2524 False\n", + "2525 False\n", + "2526 False\n", + "2527 False\n", + "2528 False\n", + "2529 False\n", + "2530 False\n", + "2531 False\n", + "2532 False\n", + "2533 False\n", + "2534 False\n", + "2535 False\n", + "2536 False\n", + "2537 False\n", + "2538 False\n", + "2539 False\n", + "2540 False\n", + "2541 False\n", + "2542 False\n", + "2543 False\n", + "Name: state/region, Length: 2544, dtype: bool" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cond = pop2['state/region'] == 'PR'\n", + "cond" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\python36\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "pop2['state'][cond] = 'Puerto Rico'" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\python36\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " \n" + ] + } + ], + "source": [ + "cond = pop2['state/region'] == 'USA'\n", + "pop2['state'][cond] = 'United State'" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state/region False\n", + "ages False\n", + "year False\n", + "population True\n", + "state False\n", + "dtype: bool" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.isnull().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(20, 5)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cond = pop2['population'].isnull()\n", + "pop2[cond].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2544, 5)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "# 将难于进行补全的空数据进行删除\n", + "pop2.dropna(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2524, 5)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state/region False\n", + "ages False\n", + "year False\n", + "population False\n", + "state False\n", + "dtype: bool" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.isnull().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state/region True\n", + "ages True\n", + "year True\n", + "population True\n", + "state True\n", + "dtype: bool" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.notnull().all()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statearea (sq. mi)
0Alabama52423
1Alaska656425
2Arizona114006
3Arkansas53182
4California163707
\n", + "
" + ], + "text/plain": [ + " state area (sq. mi)\n", + "0 Alabama 52423\n", + "1 Alaska 656425\n", + "2 Arizona 114006\n", + "3 Arkansas 53182\n", + "4 California 163707" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "areas.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulationstate
0ALunder1820121117489.0Alabama
1ALtotal20124817528.0Alabama
2ALunder1820101130966.0Alabama
3ALtotal20104785570.0Alabama
4ALunder1820111125763.0Alabama
\n", + "
" + ], + "text/plain": [ + " state/region ages year population state\n", + "0 AL under18 2012 1117489.0 Alabama\n", + "1 AL total 2012 4817528.0 Alabama\n", + "2 AL under18 2010 1130966.0 Alabama\n", + "3 AL total 2010 4785570.0 Alabama\n", + "4 AL under18 2011 1125763.0 Alabama" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2524, 6)" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop3 = pop2.merge(areas,how = 'outer')\n", + "pop3.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulationstatearea (sq. mi)
0ALunder1820121117489.0Alabama52423.0
1ALtotal20124817528.0Alabama52423.0
2ALunder1820101130966.0Alabama52423.0
3ALtotal20104785570.0Alabama52423.0
4ALunder1820111125763.0Alabama52423.0
\n", + "
" + ], + "text/plain": [ + " state/region ages year population state area (sq. mi)\n", + "0 AL under18 2012 1117489.0 Alabama 52423.0\n", + "1 AL total 2012 4817528.0 Alabama 52423.0\n", + "2 AL under18 2010 1130966.0 Alabama 52423.0\n", + "3 AL total 2010 4785570.0 Alabama 52423.0\n", + "4 AL under18 2011 1125763.0 Alabama 52423.0" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state/region False\n", + "ages False\n", + "year False\n", + "population False\n", + "state False\n", + "area (sq. mi) True\n", + "dtype: bool" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop3.isnull().any()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulationstatearea (sq. mi)
2476USAunder18199064218512.0United StateNaN
2477USAtotal1990249622814.0United StateNaN
2478USAtotal1991252980942.0United StateNaN
2479USAunder18199165313018.0United StateNaN
2480USAunder18199266509177.0United StateNaN
2481USAtotal1992256514231.0United StateNaN
2482USAtotal1993259918595.0United StateNaN
2483USAunder18199367594938.0United StateNaN
2484USAunder18199468640936.0United StateNaN
2485USAtotal1994263125826.0United StateNaN
2486USAunder18199569473140.0United StateNaN
2487USAunder18199670233512.0United StateNaN
2488USAtotal1995266278403.0United StateNaN
2489USAtotal1996269394291.0United StateNaN
2490USAtotal1997272646932.0United StateNaN
2491USAunder18199770920738.0United StateNaN
2492USAunder18199871431406.0United StateNaN
2493USAtotal1998275854116.0United StateNaN
2494USAunder18199971946051.0United StateNaN
2495USAtotal2000282162411.0United StateNaN
2496USAunder18200072376189.0United StateNaN
2497USAtotal1999279040181.0United StateNaN
2498USAtotal2001284968955.0United StateNaN
2499USAunder18200172671175.0United StateNaN
2500USAtotal2002287625193.0United StateNaN
2501USAunder18200272936457.0United StateNaN
2502USAtotal2003290107933.0United StateNaN
2503USAunder18200373100758.0United StateNaN
2504USAtotal2004292805298.0United StateNaN
2505USAunder18200473297735.0United StateNaN
2506USAtotal2005295516599.0United StateNaN
2507USAunder18200573523669.0United StateNaN
2508USAtotal2006298379912.0United StateNaN
2509USAunder18200673757714.0United StateNaN
2510USAtotal2007301231207.0United StateNaN
2511USAunder18200774019405.0United StateNaN
2512USAtotal2008304093966.0United StateNaN
2513USAunder18200874104602.0United StateNaN
2514USAunder18201373585872.0United StateNaN
2515USAtotal2013316128839.0United StateNaN
2516USAtotal2009306771529.0United StateNaN
2517USAunder18200974134167.0United StateNaN
2518USAunder18201074119556.0United StateNaN
2519USAtotal2010309326295.0United StateNaN
2520USAunder18201173902222.0United StateNaN
2521USAtotal2011311582564.0United StateNaN
2522USAunder18201273708179.0United StateNaN
2523USAtotal2012313873685.0United StateNaN
\n", + "
" + ], + "text/plain": [ + " state/region ages year population state area (sq. mi)\n", + "2476 USA under18 1990 64218512.0 United State NaN\n", + "2477 USA total 1990 249622814.0 United State NaN\n", + "2478 USA total 1991 252980942.0 United State NaN\n", + "2479 USA under18 1991 65313018.0 United State NaN\n", + "2480 USA under18 1992 66509177.0 United State NaN\n", + "2481 USA total 1992 256514231.0 United State NaN\n", + "2482 USA total 1993 259918595.0 United State NaN\n", + "2483 USA under18 1993 67594938.0 United State NaN\n", + "2484 USA under18 1994 68640936.0 United State NaN\n", + "2485 USA total 1994 263125826.0 United State NaN\n", + "2486 USA under18 1995 69473140.0 United State NaN\n", + "2487 USA under18 1996 70233512.0 United State NaN\n", + "2488 USA total 1995 266278403.0 United State NaN\n", + "2489 USA total 1996 269394291.0 United State NaN\n", + "2490 USA total 1997 272646932.0 United State NaN\n", + "2491 USA under18 1997 70920738.0 United State NaN\n", + "2492 USA under18 1998 71431406.0 United State NaN\n", + "2493 USA total 1998 275854116.0 United State NaN\n", + "2494 USA under18 1999 71946051.0 United State NaN\n", + "2495 USA total 2000 282162411.0 United State NaN\n", + "2496 USA under18 2000 72376189.0 United State NaN\n", + "2497 USA total 1999 279040181.0 United State NaN\n", + "2498 USA total 2001 284968955.0 United State NaN\n", + "2499 USA under18 2001 72671175.0 United State NaN\n", + "2500 USA total 2002 287625193.0 United State NaN\n", + "2501 USA under18 2002 72936457.0 United State NaN\n", + "2502 USA total 2003 290107933.0 United State NaN\n", + "2503 USA under18 2003 73100758.0 United State NaN\n", + "2504 USA total 2004 292805298.0 United State NaN\n", + "2505 USA under18 2004 73297735.0 United State NaN\n", + "2506 USA total 2005 295516599.0 United State NaN\n", + "2507 USA under18 2005 73523669.0 United State NaN\n", + "2508 USA total 2006 298379912.0 United State NaN\n", + "2509 USA under18 2006 73757714.0 United State NaN\n", + "2510 USA total 2007 301231207.0 United State NaN\n", + "2511 USA under18 2007 74019405.0 United State NaN\n", + "2512 USA total 2008 304093966.0 United State NaN\n", + "2513 USA under18 2008 74104602.0 United State NaN\n", + "2514 USA under18 2013 73585872.0 United State NaN\n", + "2515 USA total 2013 316128839.0 United State NaN\n", + "2516 USA total 2009 306771529.0 United State NaN\n", + "2517 USA under18 2009 74134167.0 United State NaN\n", + "2518 USA under18 2010 74119556.0 United State NaN\n", + "2519 USA total 2010 309326295.0 United State NaN\n", + "2520 USA under18 2011 73902222.0 United State NaN\n", + "2521 USA total 2011 311582564.0 United State NaN\n", + "2522 USA under18 2012 73708179.0 United State NaN\n", + "2523 USA total 2012 313873685.0 United State NaN" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cond = pop3['area (sq. mi)'].isnull()\n", + "pop3[cond]" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3790399" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = areas['area (sq. mi)'].sum()\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\python36\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n" + ] + } + ], + "source": [ + "cond = pop3['state'] == \"United State\"\n", + "\n", + "pop3['area (sq. mi)'][cond] = a" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state/region True\n", + "ages True\n", + "year True\n", + "population True\n", + "state True\n", + "area (sq. mi) True\n", + "dtype: bool" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop3.notnull().all()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulationstatearea (sq. mi)
0ALunder1820121117489.0Alabama52423.0
1ALtotal20124817528.0Alabama52423.0
2ALunder1820101130966.0Alabama52423.0
3ALtotal20104785570.0Alabama52423.0
4ALunder1820111125763.0Alabama52423.0
\n", + "
" + ], + "text/plain": [ + " state/region ages year population state area (sq. mi)\n", + "0 AL under18 2012 1117489.0 Alabama 52423.0\n", + "1 AL total 2012 4817528.0 Alabama 52423.0\n", + "2 AL under18 2010 1130966.0 Alabama 52423.0\n", + "3 AL total 2010 4785570.0 Alabama 52423.0\n", + "4 AL under18 2011 1125763.0 Alabama 52423.0" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 21.3\n", + "1 91.9\n", + "2 21.6\n", + "3 91.3\n", + "4 21.5\n", + "5 91.6\n", + "6 90.8\n", + "7 21.6\n", + "8 21.2\n", + "9 92.2\n", + "10 89.1\n", + "11 21.6\n", + "12 90.0\n", + "13 21.6\n", + "14 87.2\n", + "15 21.3\n", + "16 88.3\n", + "17 21.5\n", + "18 86.4\n", + "19 21.2\n", + "20 85.9\n", + "21 21.2\n", + "22 85.2\n", + "23 21.4\n", + "24 85.5\n", + "25 21.3\n", + "26 21.4\n", + "27 84.5\n", + "28 84.9\n", + "29 21.4\n", + " ... \n", + "2494 19.0\n", + "2495 74.4\n", + "2496 19.1\n", + "2497 73.6\n", + "2498 75.2\n", + "2499 19.2\n", + "2500 75.9\n", + "2501 19.2\n", + "2502 76.5\n", + "2503 19.3\n", + "2504 77.2\n", + "2505 19.3\n", + "2506 78.0\n", + "2507 19.4\n", + "2508 78.7\n", + "2509 19.5\n", + "2510 79.5\n", + "2511 19.5\n", + "2512 80.2\n", + "2513 19.6\n", + "2514 19.4\n", + "2515 83.4\n", + "2516 80.9\n", + "2517 19.6\n", + "2518 19.6\n", + "2519 81.6\n", + "2520 19.5\n", + "2521 82.2\n", + "2522 19.4\n", + "2523 82.8\n", + "Length: 2524, dtype: float64" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop_density = (pop3['population']/pop3['area (sq. mi)']).round(1)\n", + "pop_density" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
021.3
191.9
221.6
391.3
421.5
591.6
690.8
721.6
821.2
992.2
1089.1
1121.6
1290.0
1321.6
1487.2
1521.3
1688.3
1721.5
1886.4
1921.2
2085.9
2121.2
2285.2
2321.4
2485.5
2521.3
2621.4
2784.5
2884.9
2921.4
......
249419.0
249574.4
249619.1
249773.6
249875.2
249919.2
250075.9
250119.2
250276.5
250319.3
250477.2
250519.3
250678.0
250719.4
250878.7
250919.5
251079.5
251119.5
251280.2
251319.6
251419.4
251583.4
251680.9
251719.6
251819.6
251981.6
252019.5
252182.2
252219.4
252382.8
\n", + "

2524 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " 0\n", + "0 21.3\n", + "1 91.9\n", + "2 21.6\n", + "3 91.3\n", + "4 21.5\n", + "5 91.6\n", + "6 90.8\n", + "7 21.6\n", + "8 21.2\n", + "9 92.2\n", + "10 89.1\n", + "11 21.6\n", + "12 90.0\n", + "13 21.6\n", + "14 87.2\n", + "15 21.3\n", + "16 88.3\n", + "17 21.5\n", + "18 86.4\n", + "19 21.2\n", + "20 85.9\n", + "21 21.2\n", + "22 85.2\n", + "23 21.4\n", + "24 85.5\n", + "25 21.3\n", + "26 21.4\n", + "27 84.5\n", + "28 84.9\n", + "29 21.4\n", + "... ...\n", + "2494 19.0\n", + "2495 74.4\n", + "2496 19.1\n", + "2497 73.6\n", + "2498 75.2\n", + "2499 19.2\n", + "2500 75.9\n", + "2501 19.2\n", + "2502 76.5\n", + "2503 19.3\n", + "2504 77.2\n", + "2505 19.3\n", + "2506 78.0\n", + "2507 19.4\n", + "2508 78.7\n", + "2509 19.5\n", + "2510 79.5\n", + "2511 19.5\n", + "2512 80.2\n", + "2513 19.6\n", + "2514 19.4\n", + "2515 83.4\n", + "2516 80.9\n", + "2517 19.6\n", + "2518 19.6\n", + "2519 81.6\n", + "2520 19.5\n", + "2521 82.2\n", + "2522 19.4\n", + "2523 82.8\n", + "\n", + "[2524 rows x 1 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop_density = DataFrame(pop_density)\n", + "pop_density" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pop_density
021.3
191.9
221.6
391.3
421.5
\n", + "
" + ], + "text/plain": [ + " pop_density\n", + "0 21.3\n", + "1 91.9\n", + "2 21.6\n", + "3 91.3\n", + "4 21.5" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop_density.columns = ['pop_density']\n", + "pop_density.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulationstatearea (sq. mi)pop_density
0ALunder1820121117489.0Alabama52423.021.3
1ALtotal20124817528.0Alabama52423.091.9
2ALunder1820101130966.0Alabama52423.021.6
3ALtotal20104785570.0Alabama52423.091.3
4ALunder1820111125763.0Alabama52423.021.5
\n", + "
" + ], + "text/plain": [ + " state/region ages year population state area (sq. mi) pop_density\n", + "0 AL under18 2012 1117489.0 Alabama 52423.0 21.3\n", + "1 AL total 2012 4817528.0 Alabama 52423.0 91.9\n", + "2 AL under18 2010 1130966.0 Alabama 52423.0 21.6\n", + "3 AL total 2010 4785570.0 Alabama 52423.0 91.3\n", + "4 AL under18 2011 1125763.0 Alabama 52423.0 21.5" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop4 = pop3.merge(pop_density,left_index=True,right_index=True)\n", + "pop4.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([2012, 2010, 2011, 2009, 2013, 2007, 2008, 2005, 2006, 2004, 2003,\n", + " 2001, 2002, 1999, 2000, 1998, 1997, 1996, 1995, 1994, 1993, 1992,\n", + " 1991, 1990], dtype=int64)" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop4['year'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['under18', 'total'], dtype=object)" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop4['ages'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
state/regionagesyearpopulationstatearea (sq. mi)pop_density
1ALtotal20124817528.0Alabama52423.091.9
95AKtotal2012730307.0Alaska656425.01.1
97AZtotal20126551149.0Arizona114006.057.5
191ARtotal20122949828.0Arkansas53182.055.5
193CAtotal201237999878.0California163707.0232.1
287COtotal20125189458.0Colorado104100.049.9
289CTtotal20123591765.0Connecticut5544.0647.9
383DEtotal2012917053.0Delaware1954.0469.3
385DCtotal2012633427.0District of Columbia68.09315.1
479FLtotal201219320749.0Florida65758.0293.8
480GAtotal20129915646.0Georgia59441.0166.8
575HItotal20121390090.0Hawaii10932.0127.2
576IDtotal20121595590.0Idaho83574.019.1
671ILtotal201212868192.0Illinois57918.0222.2
672INtotal20126537782.0Indiana36420.0179.5
767IAtotal20123075039.0Iowa56276.054.6
768KStotal20122885398.0Kansas82282.035.1
863KYtotal20124379730.0Kentucky40411.0108.4
864LAtotal20124602134.0Louisiana51843.088.8
959MEtotal20121328501.0Maine35387.037.5
960MDtotal20125884868.0Maryland12407.0474.3
1055MAtotal20126645303.0Massachusetts10555.0629.6
1056MItotal20129882519.0Michigan96810.0102.1
1151MNtotal20125379646.0Minnesota86943.061.9
1152MStotal20122986450.0Mississippi48434.061.7
1247MOtotal20126024522.0Missouri69709.086.4
1248MTtotal20121005494.0Montana147046.06.8
1343NEtotal20121855350.0Nebraska77358.024.0
1344NVtotal20122754354.0Nevada110567.024.9
1439NHtotal20121321617.0New Hampshire9351.0141.3
1440NJtotal20128867749.0New Jersey8722.01016.7
1535NMtotal20122083540.0New Mexico121593.017.1
1536NYtotal201219576125.0New York54475.0359.4
1631NCtotal20129748364.0North Carolina53821.0181.1
1632NDtotal2012701345.0North Dakota70704.09.9
1727OHtotal201211553031.0Ohio44828.0257.7
1728OKtotal20123815780.0Oklahoma69903.054.6
1823ORtotal20123899801.0Oregon98386.039.6
1824PAtotal201212764475.0Pennsylvania46058.0277.1
1919RItotal20121050304.0Rhode Island1545.0679.8
1920SCtotal20124723417.0South Carolina32007.0147.6
2015SDtotal2012834047.0South Dakota77121.010.8
2016TNtotal20126454914.0Tennessee42146.0153.2
2111TXtotal201226060796.0Texas268601.097.0
2112UTtotal20122854871.0Utah84904.033.6
2207VTtotal2012625953.0Vermont9615.065.1
2208VAtotal20128186628.0Virginia42769.0191.4
2303WAtotal20126895318.0Washington71303.096.7
2304WVtotal20121856680.0West Virginia24231.076.6
2399WItotal20125724554.0Wisconsin65503.087.4
2400WYtotal2012576626.0Wyoming97818.05.9
2475PRtotal20123651545.0Puerto Rico3515.01038.8
2523USAtotal2012313873685.0United State3790399.082.8
\n", + "
" + ], + "text/plain": [ + " state/region ages year population state area (sq. mi) pop_density\n", + "1 AL total 2012 4817528.0 Alabama 52423.0 91.9\n", + "95 AK total 2012 730307.0 Alaska 656425.0 1.1\n", + "97 AZ total 2012 6551149.0 Arizona 114006.0 57.5\n", + "191 AR total 2012 2949828.0 Arkansas 53182.0 55.5\n", + "193 CA total 2012 37999878.0 California 163707.0 232.1\n", + "287 CO total 2012 5189458.0 Colorado 104100.0 49.9\n", + "289 CT total 2012 3591765.0 Connecticut 5544.0 647.9\n", + "383 DE total 2012 917053.0 Delaware 1954.0 469.3\n", + "385 DC total 2012 633427.0 District of Columbia 68.0 9315.1\n", + "479 FL total 2012 19320749.0 Florida 65758.0 293.8\n", + "480 GA total 2012 9915646.0 Georgia 59441.0 166.8\n", + "575 HI total 2012 1390090.0 Hawaii 10932.0 127.2\n", + "576 ID total 2012 1595590.0 Idaho 83574.0 19.1\n", + "671 IL total 2012 12868192.0 Illinois 57918.0 222.2\n", + "672 IN total 2012 6537782.0 Indiana 36420.0 179.5\n", + "767 IA total 2012 3075039.0 Iowa 56276.0 54.6\n", + "768 KS total 2012 2885398.0 Kansas 82282.0 35.1\n", + "863 KY total 2012 4379730.0 Kentucky 40411.0 108.4\n", + "864 LA total 2012 4602134.0 Louisiana 51843.0 88.8\n", + "959 ME total 2012 1328501.0 Maine 35387.0 37.5\n", + "960 MD total 2012 5884868.0 Maryland 12407.0 474.3\n", + "1055 MA total 2012 6645303.0 Massachusetts 10555.0 629.6\n", + "1056 MI total 2012 9882519.0 Michigan 96810.0 102.1\n", + "1151 MN total 2012 5379646.0 Minnesota 86943.0 61.9\n", + "1152 MS total 2012 2986450.0 Mississippi 48434.0 61.7\n", + "1247 MO total 2012 6024522.0 Missouri 69709.0 86.4\n", + "1248 MT total 2012 1005494.0 Montana 147046.0 6.8\n", + "1343 NE total 2012 1855350.0 Nebraska 77358.0 24.0\n", + "1344 NV total 2012 2754354.0 Nevada 110567.0 24.9\n", + "1439 NH total 2012 1321617.0 New Hampshire 9351.0 141.3\n", + "1440 NJ total 2012 8867749.0 New Jersey 8722.0 1016.7\n", + "1535 NM total 2012 2083540.0 New Mexico 121593.0 17.1\n", + "1536 NY total 2012 19576125.0 New York 54475.0 359.4\n", + "1631 NC total 2012 9748364.0 North Carolina 53821.0 181.1\n", + "1632 ND total 2012 701345.0 North Dakota 70704.0 9.9\n", + "1727 OH total 2012 11553031.0 Ohio 44828.0 257.7\n", + "1728 OK total 2012 3815780.0 Oklahoma 69903.0 54.6\n", + "1823 OR total 2012 3899801.0 Oregon 98386.0 39.6\n", + "1824 PA total 2012 12764475.0 Pennsylvania 46058.0 277.1\n", + "1919 RI total 2012 1050304.0 Rhode Island 1545.0 679.8\n", + "1920 SC total 2012 4723417.0 South Carolina 32007.0 147.6\n", + "2015 SD total 2012 834047.0 South Dakota 77121.0 10.8\n", + "2016 TN total 2012 6454914.0 Tennessee 42146.0 153.2\n", + "2111 TX total 2012 26060796.0 Texas 268601.0 97.0\n", + "2112 UT total 2012 2854871.0 Utah 84904.0 33.6\n", + "2207 VT total 2012 625953.0 Vermont 9615.0 65.1\n", + "2208 VA total 2012 8186628.0 Virginia 42769.0 191.4\n", + "2303 WA total 2012 6895318.0 Washington 71303.0 96.7\n", + "2304 WV total 2012 1856680.0 West Virginia 24231.0 76.6\n", + "2399 WI total 2012 5724554.0 Wisconsin 65503.0 87.4\n", + "2400 WY total 2012 576626.0 Wyoming 97818.0 5.9\n", + "2475 PR total 2012 3651545.0 Puerto Rico 3515.0 1038.8\n", + "2523 USA total 2012 313873685.0 United State 3790399.0 82.8" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 查找2012年美国各州的全民人口数据\n", + "\n", + "# pandas非常强大的,可以像查询数据库一样进行数据查询\n", + "\n", + "pop5 = pop4.query(\"year == 2012 and ages == 'total'\")\n", + "pop5" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "pop5.set_index(keys = 'state/region',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesyearpopulationstatearea (sq. mi)pop_density
state/region
AKtotal2012730307.0Alaska656425.01.1
WYtotal2012576626.0Wyoming97818.05.9
MTtotal20121005494.0Montana147046.06.8
NDtotal2012701345.0North Dakota70704.09.9
SDtotal2012834047.0South Dakota77121.010.8
NMtotal20122083540.0New Mexico121593.017.1
IDtotal20121595590.0Idaho83574.019.1
NEtotal20121855350.0Nebraska77358.024.0
NVtotal20122754354.0Nevada110567.024.9
UTtotal20122854871.0Utah84904.033.6
KStotal20122885398.0Kansas82282.035.1
MEtotal20121328501.0Maine35387.037.5
ORtotal20123899801.0Oregon98386.039.6
COtotal20125189458.0Colorado104100.049.9
IAtotal20123075039.0Iowa56276.054.6
OKtotal20123815780.0Oklahoma69903.054.6
ARtotal20122949828.0Arkansas53182.055.5
AZtotal20126551149.0Arizona114006.057.5
MStotal20122986450.0Mississippi48434.061.7
MNtotal20125379646.0Minnesota86943.061.9
VTtotal2012625953.0Vermont9615.065.1
WVtotal20121856680.0West Virginia24231.076.6
USAtotal2012313873685.0United State3790399.082.8
MOtotal20126024522.0Missouri69709.086.4
WItotal20125724554.0Wisconsin65503.087.4
LAtotal20124602134.0Louisiana51843.088.8
ALtotal20124817528.0Alabama52423.091.9
WAtotal20126895318.0Washington71303.096.7
TXtotal201226060796.0Texas268601.097.0
MItotal20129882519.0Michigan96810.0102.1
KYtotal20124379730.0Kentucky40411.0108.4
HItotal20121390090.0Hawaii10932.0127.2
NHtotal20121321617.0New Hampshire9351.0141.3
SCtotal20124723417.0South Carolina32007.0147.6
TNtotal20126454914.0Tennessee42146.0153.2
GAtotal20129915646.0Georgia59441.0166.8
INtotal20126537782.0Indiana36420.0179.5
NCtotal20129748364.0North Carolina53821.0181.1
VAtotal20128186628.0Virginia42769.0191.4
ILtotal201212868192.0Illinois57918.0222.2
CAtotal201237999878.0California163707.0232.1
OHtotal201211553031.0Ohio44828.0257.7
PAtotal201212764475.0Pennsylvania46058.0277.1
FLtotal201219320749.0Florida65758.0293.8
NYtotal201219576125.0New York54475.0359.4
DEtotal2012917053.0Delaware1954.0469.3
MDtotal20125884868.0Maryland12407.0474.3
MAtotal20126645303.0Massachusetts10555.0629.6
CTtotal20123591765.0Connecticut5544.0647.9
RItotal20121050304.0Rhode Island1545.0679.8
NJtotal20128867749.0New Jersey8722.01016.7
PRtotal20123651545.0Puerto Rico3515.01038.8
DCtotal2012633427.0District of Columbia68.09315.1
\n", + "
" + ], + "text/plain": [ + " ages year population state area (sq. mi) pop_density\n", + "state/region \n", + "AK total 2012 730307.0 Alaska 656425.0 1.1\n", + "WY total 2012 576626.0 Wyoming 97818.0 5.9\n", + "MT total 2012 1005494.0 Montana 147046.0 6.8\n", + "ND total 2012 701345.0 North Dakota 70704.0 9.9\n", + "SD total 2012 834047.0 South Dakota 77121.0 10.8\n", + "NM total 2012 2083540.0 New Mexico 121593.0 17.1\n", + "ID total 2012 1595590.0 Idaho 83574.0 19.1\n", + "NE total 2012 1855350.0 Nebraska 77358.0 24.0\n", + "NV total 2012 2754354.0 Nevada 110567.0 24.9\n", + "UT total 2012 2854871.0 Utah 84904.0 33.6\n", + "KS total 2012 2885398.0 Kansas 82282.0 35.1\n", + "ME total 2012 1328501.0 Maine 35387.0 37.5\n", + "OR total 2012 3899801.0 Oregon 98386.0 39.6\n", + "CO total 2012 5189458.0 Colorado 104100.0 49.9\n", + "IA total 2012 3075039.0 Iowa 56276.0 54.6\n", + "OK total 2012 3815780.0 Oklahoma 69903.0 54.6\n", + "AR total 2012 2949828.0 Arkansas 53182.0 55.5\n", + "AZ total 2012 6551149.0 Arizona 114006.0 57.5\n", + "MS total 2012 2986450.0 Mississippi 48434.0 61.7\n", + "MN total 2012 5379646.0 Minnesota 86943.0 61.9\n", + "VT total 2012 625953.0 Vermont 9615.0 65.1\n", + "WV total 2012 1856680.0 West Virginia 24231.0 76.6\n", + "USA total 2012 313873685.0 United State 3790399.0 82.8\n", + "MO total 2012 6024522.0 Missouri 69709.0 86.4\n", + "WI total 2012 5724554.0 Wisconsin 65503.0 87.4\n", + "LA total 2012 4602134.0 Louisiana 51843.0 88.8\n", + "AL total 2012 4817528.0 Alabama 52423.0 91.9\n", + "WA total 2012 6895318.0 Washington 71303.0 96.7\n", + "TX total 2012 26060796.0 Texas 268601.0 97.0\n", + "MI total 2012 9882519.0 Michigan 96810.0 102.1\n", + "KY total 2012 4379730.0 Kentucky 40411.0 108.4\n", + "HI total 2012 1390090.0 Hawaii 10932.0 127.2\n", + "NH total 2012 1321617.0 New Hampshire 9351.0 141.3\n", + "SC total 2012 4723417.0 South Carolina 32007.0 147.6\n", + "TN total 2012 6454914.0 Tennessee 42146.0 153.2\n", + "GA total 2012 9915646.0 Georgia 59441.0 166.8\n", + "IN total 2012 6537782.0 Indiana 36420.0 179.5\n", + "NC total 2012 9748364.0 North Carolina 53821.0 181.1\n", + "VA total 2012 8186628.0 Virginia 42769.0 191.4\n", + "IL total 2012 12868192.0 Illinois 57918.0 222.2\n", + "CA total 2012 37999878.0 California 163707.0 232.1\n", + "OH total 2012 11553031.0 Ohio 44828.0 257.7\n", + "PA total 2012 12764475.0 Pennsylvania 46058.0 277.1\n", + "FL total 2012 19320749.0 Florida 65758.0 293.8\n", + "NY total 2012 19576125.0 New York 54475.0 359.4\n", + "DE total 2012 917053.0 Delaware 1954.0 469.3\n", + "MD total 2012 5884868.0 Maryland 12407.0 474.3\n", + "MA total 2012 6645303.0 Massachusetts 10555.0 629.6\n", + "CT total 2012 3591765.0 Connecticut 5544.0 647.9\n", + "RI total 2012 1050304.0 Rhode Island 1545.0 679.8\n", + "NJ total 2012 8867749.0 New Jersey 8722.0 1016.7\n", + "PR total 2012 3651545.0 Puerto Rico 3515.0 1038.8\n", + "DC total 2012 633427.0 District of Columbia 68.0 9315.1" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop5.sort_values(by = 'pop_density')" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesyearpopulationstatearea (sq. mi)pop_density
state/region
DCtotal2012633427.0District of Columbia68.09315.1
PRtotal20123651545.0Puerto Rico3515.01038.8
NJtotal20128867749.0New Jersey8722.01016.7
RItotal20121050304.0Rhode Island1545.0679.8
CTtotal20123591765.0Connecticut5544.0647.9
MAtotal20126645303.0Massachusetts10555.0629.6
MDtotal20125884868.0Maryland12407.0474.3
DEtotal2012917053.0Delaware1954.0469.3
NYtotal201219576125.0New York54475.0359.4
FLtotal201219320749.0Florida65758.0293.8
PAtotal201212764475.0Pennsylvania46058.0277.1
OHtotal201211553031.0Ohio44828.0257.7
CAtotal201237999878.0California163707.0232.1
ILtotal201212868192.0Illinois57918.0222.2
VAtotal20128186628.0Virginia42769.0191.4
NCtotal20129748364.0North Carolina53821.0181.1
INtotal20126537782.0Indiana36420.0179.5
GAtotal20129915646.0Georgia59441.0166.8
TNtotal20126454914.0Tennessee42146.0153.2
SCtotal20124723417.0South Carolina32007.0147.6
NHtotal20121321617.0New Hampshire9351.0141.3
HItotal20121390090.0Hawaii10932.0127.2
KYtotal20124379730.0Kentucky40411.0108.4
MItotal20129882519.0Michigan96810.0102.1
TXtotal201226060796.0Texas268601.097.0
WAtotal20126895318.0Washington71303.096.7
ALtotal20124817528.0Alabama52423.091.9
LAtotal20124602134.0Louisiana51843.088.8
WItotal20125724554.0Wisconsin65503.087.4
MOtotal20126024522.0Missouri69709.086.4
USAtotal2012313873685.0United State3790399.082.8
WVtotal20121856680.0West Virginia24231.076.6
VTtotal2012625953.0Vermont9615.065.1
MNtotal20125379646.0Minnesota86943.061.9
MStotal20122986450.0Mississippi48434.061.7
AZtotal20126551149.0Arizona114006.057.5
ARtotal20122949828.0Arkansas53182.055.5
OKtotal20123815780.0Oklahoma69903.054.6
IAtotal20123075039.0Iowa56276.054.6
COtotal20125189458.0Colorado104100.049.9
ORtotal20123899801.0Oregon98386.039.6
MEtotal20121328501.0Maine35387.037.5
KStotal20122885398.0Kansas82282.035.1
UTtotal20122854871.0Utah84904.033.6
NVtotal20122754354.0Nevada110567.024.9
NEtotal20121855350.0Nebraska77358.024.0
IDtotal20121595590.0Idaho83574.019.1
NMtotal20122083540.0New Mexico121593.017.1
SDtotal2012834047.0South Dakota77121.010.8
NDtotal2012701345.0North Dakota70704.09.9
MTtotal20121005494.0Montana147046.06.8
WYtotal2012576626.0Wyoming97818.05.9
AKtotal2012730307.0Alaska656425.01.1
\n", + "
" + ], + "text/plain": [ + " ages year population state area (sq. mi) pop_density\n", + "state/region \n", + "DC total 2012 633427.0 District of Columbia 68.0 9315.1\n", + "PR total 2012 3651545.0 Puerto Rico 3515.0 1038.8\n", + "NJ total 2012 8867749.0 New Jersey 8722.0 1016.7\n", + "RI total 2012 1050304.0 Rhode Island 1545.0 679.8\n", + "CT total 2012 3591765.0 Connecticut 5544.0 647.9\n", + "MA total 2012 6645303.0 Massachusetts 10555.0 629.6\n", + "MD total 2012 5884868.0 Maryland 12407.0 474.3\n", + "DE total 2012 917053.0 Delaware 1954.0 469.3\n", + "NY total 2012 19576125.0 New York 54475.0 359.4\n", + "FL total 2012 19320749.0 Florida 65758.0 293.8\n", + "PA total 2012 12764475.0 Pennsylvania 46058.0 277.1\n", + "OH total 2012 11553031.0 Ohio 44828.0 257.7\n", + "CA total 2012 37999878.0 California 163707.0 232.1\n", + "IL total 2012 12868192.0 Illinois 57918.0 222.2\n", + "VA total 2012 8186628.0 Virginia 42769.0 191.4\n", + "NC total 2012 9748364.0 North Carolina 53821.0 181.1\n", + "IN total 2012 6537782.0 Indiana 36420.0 179.5\n", + "GA total 2012 9915646.0 Georgia 59441.0 166.8\n", + "TN total 2012 6454914.0 Tennessee 42146.0 153.2\n", + "SC total 2012 4723417.0 South Carolina 32007.0 147.6\n", + "NH total 2012 1321617.0 New Hampshire 9351.0 141.3\n", + "HI total 2012 1390090.0 Hawaii 10932.0 127.2\n", + "KY total 2012 4379730.0 Kentucky 40411.0 108.4\n", + "MI total 2012 9882519.0 Michigan 96810.0 102.1\n", + "TX total 2012 26060796.0 Texas 268601.0 97.0\n", + "WA total 2012 6895318.0 Washington 71303.0 96.7\n", + "AL total 2012 4817528.0 Alabama 52423.0 91.9\n", + "LA total 2012 4602134.0 Louisiana 51843.0 88.8\n", + "WI total 2012 5724554.0 Wisconsin 65503.0 87.4\n", + "MO total 2012 6024522.0 Missouri 69709.0 86.4\n", + "USA total 2012 313873685.0 United State 3790399.0 82.8\n", + "WV total 2012 1856680.0 West Virginia 24231.0 76.6\n", + "VT total 2012 625953.0 Vermont 9615.0 65.1\n", + "MN total 2012 5379646.0 Minnesota 86943.0 61.9\n", + "MS total 2012 2986450.0 Mississippi 48434.0 61.7\n", + "AZ total 2012 6551149.0 Arizona 114006.0 57.5\n", + "AR total 2012 2949828.0 Arkansas 53182.0 55.5\n", + "OK total 2012 3815780.0 Oklahoma 69903.0 54.6\n", + "IA total 2012 3075039.0 Iowa 56276.0 54.6\n", + "CO total 2012 5189458.0 Colorado 104100.0 49.9\n", + "OR total 2012 3899801.0 Oregon 98386.0 39.6\n", + "ME total 2012 1328501.0 Maine 35387.0 37.5\n", + "KS total 2012 2885398.0 Kansas 82282.0 35.1\n", + "UT total 2012 2854871.0 Utah 84904.0 33.6\n", + "NV total 2012 2754354.0 Nevada 110567.0 24.9\n", + "NE total 2012 1855350.0 Nebraska 77358.0 24.0\n", + "ID total 2012 1595590.0 Idaho 83574.0 19.1\n", + "NM total 2012 2083540.0 New Mexico 121593.0 17.1\n", + "SD total 2012 834047.0 South Dakota 77121.0 10.8\n", + "ND total 2012 701345.0 North Dakota 70704.0 9.9\n", + "MT total 2012 1005494.0 Montana 147046.0 6.8\n", + "WY total 2012 576626.0 Wyoming 97818.0 5.9\n", + "AK total 2012 730307.0 Alaska 656425.0 1.1" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pop5.sort_values(by='pop_density',ascending=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Day76-90/code/cancer_predict.npy b/Day76-90/code/cancer_predict.npy new file mode 100644 index 0000000000000000000000000000000000000000..a6bf034d44f5d207081381c00677db8ce4bcd7da GIT binary patch literal 450 zcmbu2O-sZu5QfvLtH$rHKcKf(CRb1&+N=vS| zb9U~YxYnEXkHYUgme-TA@mL|aH>oo@rb5sLM}o5=ucWA%#DH`P=ZSW~AZP|X7O^7X ztin8R2MlYWfe?yeJ?tyYs2t~~u<-%?05<2a6~MMb401UG{ChBwO{w;F!k)^HE?}1+ zV$5MLfPIJPYg0JzegE!TRoBS1gv0pr367lZw;M1!9Ks+Sq-OWbKh)Y%ZJn02$;GtF Sw5GjfUrj7FO)lX$_I?1@n0Pq= literal 0 HcmV?d00001 diff --git a/Day76-90/code/state-abbrevs.csv b/Day76-90/code/state-abbrevs.csv new file mode 100644 index 0000000..6d4db36 --- /dev/null +++ b/Day76-90/code/state-abbrevs.csv @@ -0,0 +1,52 @@ +"state","abbreviation" +"Alabama","AL" +"Alaska","AK" +"Arizona","AZ" +"Arkansas","AR" +"California","CA" +"Colorado","CO" +"Connecticut","CT" +"Delaware","DE" +"District of Columbia","DC" +"Florida","FL" +"Georgia","GA" +"Hawaii","HI" +"Idaho","ID" +"Illinois","IL" +"Indiana","IN" +"Iowa","IA" +"Kansas","KS" +"Kentucky","KY" +"Louisiana","LA" +"Maine","ME" +"Montana","MT" +"Nebraska","NE" +"Nevada","NV" +"New Hampshire","NH" +"New Jersey","NJ" +"New Mexico","NM" +"New York","NY" +"North Carolina","NC" +"North Dakota","ND" +"Ohio","OH" +"Oklahoma","OK" +"Oregon","OR" +"Maryland","MD" +"Massachusetts","MA" +"Michigan","MI" +"Minnesota","MN" +"Mississippi","MS" +"Missouri","MO" +"Pennsylvania","PA" +"Rhode Island","RI" +"South Carolina","SC" +"South Dakota","SD" +"Tennessee","TN" +"Texas","TX" +"Utah","UT" +"Vermont","VT" +"Virginia","VA" +"Washington","WA" +"West Virginia","WV" +"Wisconsin","WI" +"Wyoming","WY" \ No newline at end of file diff --git a/Day76-90/code/state-areas.csv b/Day76-90/code/state-areas.csv new file mode 100644 index 0000000..322345c --- /dev/null +++ b/Day76-90/code/state-areas.csv @@ -0,0 +1,53 @@ +state,area (sq. mi) +Alabama,52423 +Alaska,656425 +Arizona,114006 +Arkansas,53182 +California,163707 +Colorado,104100 +Connecticut,5544 +Delaware,1954 +Florida,65758 +Georgia,59441 +Hawaii,10932 +Idaho,83574 +Illinois,57918 +Indiana,36420 +Iowa,56276 +Kansas,82282 +Kentucky,40411 +Louisiana,51843 +Maine,35387 +Maryland,12407 +Massachusetts,10555 +Michigan,96810 +Minnesota,86943 +Mississippi,48434 +Missouri,69709 +Montana,147046 +Nebraska,77358 +Nevada,110567 +New Hampshire,9351 +New Jersey,8722 +New Mexico,121593 +New York,54475 +North Carolina,53821 +North Dakota,70704 +Ohio,44828 +Oklahoma,69903 +Oregon,98386 +Pennsylvania,46058 +Rhode Island,1545 +South Carolina,32007 +South Dakota,77121 +Tennessee,42146 +Texas,268601 +Utah,84904 +Vermont,9615 +Virginia,42769 +Washington,71303 +West Virginia,24231 +Wisconsin,65503 +Wyoming,97818 +District of Columbia,68 +Puerto Rico,3515 diff --git a/Day76-90/code/state-population.csv b/Day76-90/code/state-population.csv new file mode 100644 index 0000000..c76110e --- /dev/null +++ b/Day76-90/code/state-population.csv @@ -0,0 +1,2545 @@ +state/region,ages,year,population +AL,under18,2012,1117489 +AL,total,2012,4817528 +AL,under18,2010,1130966 +AL,total,2010,4785570 +AL,under18,2011,1125763 +AL,total,2011,4801627 +AL,total,2009,4757938 +AL,under18,2009,1134192 +AL,under18,2013,1111481 +AL,total,2013,4833722 +AL,total,2007,4672840 +AL,under18,2007,1132296 +AL,total,2008,4718206 +AL,under18,2008,1134927 +AL,total,2005,4569805 +AL,under18,2005,1117229 +AL,total,2006,4628981 +AL,under18,2006,1126798 +AL,total,2004,4530729 +AL,under18,2004,1113662 +AL,total,2003,4503491 +AL,under18,2003,1113083 +AL,total,2001,4467634 +AL,under18,2001,1120409 +AL,total,2002,4480089 +AL,under18,2002,1116590 +AL,under18,1999,1121287 +AL,total,1999,4430141 +AL,total,2000,4452173 +AL,under18,2000,1122273 +AL,total,1998,4404701 +AL,under18,1998,1118252 +AL,under18,1997,1122893 +AL,total,1997,4367935 +AL,total,1996,4331103 +AL,total,1995,4296800 +AL,under18,1995,1110553 +AL,under18,1996,1112092 +AL,total,1994,4260229 +AL,total,1993,4214202 +AL,under18,1993,1085606 +AL,under18,1994,1097180 +AL,under18,1992,1072873 +AL,total,1992,4154014 +AL,total,1991,4099156 +AL,under18,1991,1060794 +AL,under18,1990,1050041 +AL,total,1990,4050055 +AK,total,1990,553290 +AK,under18,1990,177502 +AK,total,1992,588736 +AK,under18,1991,182180 +AK,under18,1992,184878 +AK,total,1994,603308 +AK,under18,1994,187439 +AK,total,1991,570193 +AK,total,1993,599434 +AK,under18,1993,187190 +AK,total,1995,604412 +AK,under18,1995,184990 +AK,total,1996,608569 +AK,under18,1996,185360 +AK,under18,1997,188280 +AK,under18,1998,192636 +AK,total,1998,619933 +AK,total,1997,612968 +AK,under18,1999,191422 +AK,total,1999,624779 +AK,total,2000,627963 +AK,under18,2000,190615 +AK,total,2001,633714 +AK,under18,2001,188771 +AK,total,2002,642337 +AK,under18,2002,188482 +AK,total,2003,648414 +AK,under18,2003,186843 +AK,total,2004,659286 +AK,under18,2004,186335 +AK,total,2005,666946 +AK,under18,2005,185304 +AK,total,2006,675302 +AK,under18,2006,185580 +AK,total,2007,680300 +AK,under18,2007,184344 +AK,total,2008,687455 +AK,under18,2008,183124 +AK,under18,2013,188132 +AK,total,2013,735132 +AK,total,2009,698895 +AK,under18,2009,186351 +AK,under18,2010,187902 +AK,total,2010,713868 +AK,under18,2011,188329 +AK,total,2011,723375 +AK,under18,2012,188162 +AK,total,2012,730307 +AZ,under18,2012,1617149 +AZ,total,2012,6551149 +AZ,under18,2011,1616353 +AZ,total,2011,6468796 +AZ,under18,2010,1628563 +AZ,total,2010,6408790 +AZ,under18,2013,1616814 +AZ,total,2013,6626624 +AZ,total,2009,6343154 +AZ,under18,2009,1627343 +AZ,total,2007,6167681 +AZ,under18,2007,1607895 +AZ,total,2008,6280362 +AZ,under18,2008,1628651 +AZ,total,2005,5839077 +AZ,under18,2005,1529168 +AZ,total,2006,6029141 +AZ,under18,2006,1574867 +AZ,total,2004,5652404 +AZ,under18,2004,1484454 +AZ,total,2003,5510364 +AZ,under18,2003,1453671 +AZ,total,2001,5273477 +AZ,under18,2001,1399015 +AZ,total,2002,5396255 +AZ,under18,2002,1427938 +AZ,under18,1999,1332396 +AZ,total,1999,5023823 +AZ,total,2000,5160586 +AZ,under18,2000,1373414 +AZ,total,1998,4883342 +AZ,under18,1998,1285794 +AZ,total,1997,4736990 +AZ,under18,1997,1237159 +AZ,under18,1996,1215285 +AZ,total,1996,4586940 +AZ,total,1995,4432499 +AZ,under18,1995,1173391 +AZ,total,1993,4065440 +AZ,under18,1993,1094233 +AZ,under18,1994,1119857 +AZ,total,1994,4245089 +AZ,under18,1992,1055572 +AZ,under18,1991,1028285 +AZ,total,1991,3788576 +AZ,total,1992,3915740 +AZ,under18,1990,1006040 +AZ,total,1990,3684097 +AR,under18,1990,620933 +AR,total,1990,2356586 +AR,total,1991,2383144 +AR,under18,1991,626212 +AR,under18,1992,638269 +AR,total,1992,2415984 +AR,under18,1994,653842 +AR,total,1994,2494019 +AR,total,1993,2456303 +AR,under18,1993,643474 +AR,under18,1995,667671 +AR,total,1995,2535399 +AR,under18,1996,677912 +AR,total,1996,2572109 +AR,under18,1998,683637 +AR,total,1997,2601091 +AR,under18,1997,680203 +AR,total,1998,2626289 +AR,total,2000,2678588 +AR,under18,2000,680378 +AR,under18,1999,681940 +AR,total,1999,2651860 +AR,total,2002,2705927 +AR,under18,2002,678798 +AR,total,2001,2691571 +AR,under18,2001,679606 +AR,total,2004,2749686 +AR,under18,2004,683166 +AR,total,2003,2724816 +AR,under18,2003,679579 +AR,total,2006,2821761 +AR,under18,2006,697842 +AR,total,2005,2781097 +AR,under18,2005,689787 +AR,total,2008,2874554 +AR,under18,2008,705725 +AR,total,2007,2848650 +AR,under18,2007,702737 +AR,total,2009,2896843 +AR,under18,2009,707886 +AR,under18,2013,709866 +AR,total,2013,2959373 +AR,under18,2011,710576 +AR,total,2011,2938506 +AR,under18,2010,711947 +AR,total,2010,2922280 +AR,under18,2012,710471 +AR,total,2012,2949828 +CA,under18,2012,9209007 +CA,total,2012,37999878 +CA,under18,2011,9252336 +CA,total,2011,37668681 +CA,under18,2010,9284094 +CA,total,2010,37333601 +CA,under18,2013,9174877 +CA,total,2013,38332521 +CA,total,2009,36961229 +CA,under18,2009,9294501 +CA,total,2007,36250311 +CA,under18,2007,9335620 +CA,total,2008,36604337 +CA,under18,2008,9321621 +CA,total,2005,35827943 +CA,under18,2005,9405565 +CA,total,2006,36021202 +CA,under18,2006,9370884 +CA,total,2003,35253159 +CA,under18,2003,9404594 +CA,total,2004,35574576 +CA,under18,2004,9418497 +CA,total,2001,34479458 +CA,under18,2001,9325466 +CA,total,2002,34871843 +CA,under18,2002,9365142 +CA,under18,1999,9207878 +CA,total,1999,33499204 +CA,total,2000,33987977 +CA,under18,2000,9267089 +CA,under18,1998,9163238 +CA,total,1998,32987675 +CA,under18,1997,9135359 +CA,total,1997,32486010 +CA,under18,1996,9079519 +CA,total,1996,32018834 +CA,total,1995,31696582 +CA,under18,1995,8920578 +CA,total,1993,31274928 +CA,under18,1993,8624810 +CA,under18,1994,8790058 +CA,total,1994,31484435 +CA,total,1991,30470736 +CA,under18,1991,8245605 +CA,under18,1992,8439647 +CA,total,1992,30974659 +CA,under18,1990,7980501 +CA,total,1990,29959515 +CO,total,1990,3307618 +CO,under18,1990,881640 +CO,total,1992,3495939 +CO,under18,1992,925577 +CO,under18,1991,896537 +CO,total,1991,3387119 +CO,total,1994,3724168 +CO,under18,1994,966412 +CO,under18,1993,947806 +CO,total,1993,3613734 +CO,under18,1995,984310 +CO,total,1995,3826653 +CO,total,1996,3919972 +CO,under18,1996,1003946 +CO,under18,1997,1030557 +CO,total,1997,4018293 +CO,total,1998,4116639 +CO,under18,1998,1060066 +CO,total,2000,4326921 +CO,under18,2000,1106676 +CO,total,1999,4226018 +CO,under18,1999,1083938 +CO,total,2002,4490406 +CO,under18,2002,1138273 +CO,total,2001,4425687 +CO,under18,2001,1126647 +CO,total,2004,4575013 +CO,under18,2004,1146369 +CO,total,2003,4528732 +CO,under18,2003,1144597 +CO,total,2006,4720423 +CO,under18,2006,1171832 +CO,total,2005,4631888 +CO,under18,2005,1156399 +CO,total,2008,4889730 +CO,under18,2008,1203289 +CO,total,2007,4803868 +CO,under18,2007,1189434 +CO,total,2009,4972195 +CO,under18,2009,1217213 +CO,under18,2013,1237932 +CO,total,2013,5268367 +CO,under18,2010,1226619 +CO,total,2010,5048196 +CO,under18,2011,1230178 +CO,total,2011,5118400 +CO,under18,2012,1232864 +CO,total,2012,5189458 +CT,under18,2012,794959 +CT,total,2012,3591765 +CT,under18,2011,805109 +CT,total,2011,3588948 +CT,under18,2010,814187 +CT,total,2010,3579210 +CT,under18,2013,785566 +CT,total,2013,3596080 +CT,total,2009,3561807 +CT,under18,2009,820839 +CT,total,2007,3527270 +CT,under18,2007,833484 +CT,total,2008,3545579 +CT,under18,2008,826626 +CT,total,2005,3506956 +CT,under18,2005,844034 +CT,total,2006,3517460 +CT,under18,2006,839372 +CT,total,2003,3484336 +CT,under18,2003,851115 +CT,total,2004,3496094 +CT,under18,2004,848979 +CT,total,2001,3432835 +CT,under18,2001,845850 +CT,total,2002,3458749 +CT,under18,2002,848877 +CT,total,1999,3386401 +CT,under18,1999,834654 +CT,total,2000,3411777 +CT,under18,2000,842242 +CT,under18,1998,824600 +CT,total,1998,3365352 +CT,total,1997,3349348 +CT,under18,1997,814373 +CT,under18,1996,811855 +CT,total,1996,3336685 +CT,total,1995,3324144 +CT,under18,1995,808623 +CT,total,1993,3309175 +CT,under18,1993,790749 +CT,under18,1994,801231 +CT,total,1994,3316121 +CT,under18,1991,766304 +CT,total,1991,3302895 +CT,under18,1992,777264 +CT,total,1992,3300712 +CT,total,1990,3291967 +CT,under18,1990,752666 +DE,under18,1990,165628 +DE,total,1990,669567 +DE,under18,1992,174166 +DE,total,1992,694927 +DE,total,1991,683080 +DE,under18,1991,169910 +DE,total,1994,717545 +DE,under18,1994,180833 +DE,total,1993,706378 +DE,under18,1993,176916 +DE,under18,1995,181736 +DE,total,1995,729735 +DE,total,1996,740978 +DE,under18,1996,184021 +DE,under18,1997,186607 +DE,total,1997,751487 +DE,total,1998,763335 +DE,under18,1998,189302 +DE,total,2000,786373 +DE,under18,2000,194914 +DE,total,1999,774990 +DE,under18,1999,192510 +DE,total,2002,806169 +DE,under18,2002,196946 +DE,total,2001,795699 +DE,under18,2001,196038 +DE,total,2004,830803 +DE,under18,2004,199631 +DE,total,2003,818003 +DE,under18,2003,198045 +DE,total,2006,859268 +DE,under18,2006,203729 +DE,total,2005,845150 +DE,under18,2005,201988 +DE,total,2008,883874 +DE,under18,2008,206116 +DE,total,2007,871749 +DE,under18,2007,205155 +DE,under18,2013,203558 +DE,total,2013,925749 +DE,total,2009,891730 +DE,under18,2009,206213 +DE,under18,2010,205478 +DE,total,2010,899711 +DE,under18,2011,204801 +DE,total,2011,907985 +DE,under18,2012,204586 +DE,total,2012,917053 +DC,under18,2012,107642 +DC,total,2012,633427 +DC,under18,2011,103906 +DC,total,2011,619624 +DC,under18,2010,101309 +DC,total,2010,605125 +DC,under18,2013,111474 +DC,total,2013,646449 +DC,total,2009,592228 +DC,under18,2009,102098 +DC,total,2007,574404 +DC,under18,2007,104126 +DC,total,2008,580236 +DC,under18,2008,102257 +DC,total,2005,567136 +DC,under18,2005,107187 +DC,total,2006,570681 +DC,under18,2006,105651 +DC,total,2003,568502 +DC,under18,2003,111403 +DC,total,2004,567754 +DC,under18,2004,109756 +DC,total,2001,574504 +DC,under18,2001,114625 +DC,total,2002,573158 +DC,under18,2002,113822 +DC,total,1999,570220 +DC,under18,1999,115003 +DC,total,2000,572046 +DC,under18,2000,114503 +DC,under18,1998,113839 +DC,total,1998,565232 +DC,under18,1997,119531 +DC,total,1997,567739 +DC,under18,1996,121210 +DC,total,1996,572379 +DC,total,1995,580519 +DC,under18,1995,123620 +DC,total,1993,595302 +DC,under18,1993,120471 +DC,under18,1994,122170 +DC,total,1994,589240 +DC,total,1991,600870 +DC,under18,1991,116825 +DC,under18,1992,118636 +DC,total,1992,597567 +DC,under18,1990,112632 +DC,total,1990,605321 +FL,total,1990,13033307 +FL,under18,1990,2988807 +FL,under18,1991,3045638 +FL,total,1991,13369798 +FL,total,1994,14239444 +FL,under18,1994,3299887 +FL,under18,1993,3214066 +FL,total,1993,13927185 +FL,total,1992,13650553 +FL,under18,1992,3120439 +FL,under18,1995,3366468 +FL,total,1995,14537875 +FL,total,1996,14853360 +FL,under18,1996,3431695 +FL,under18,1998,3557561 +FL,under18,1997,3502269 +FL,total,1997,15186304 +FL,total,1998,15486559 +FL,total,1999,15759421 +FL,under18,1999,3611711 +FL,total,2000,16047515 +FL,under18,2000,3654880 +FL,total,2001,16356966 +FL,under18,2001,3714439 +FL,total,2002,16689370 +FL,under18,2002,3774624 +FL,total,2003,17004085 +FL,under18,2003,3820876 +FL,total,2004,17415318 +FL,under18,2004,3890734 +FL,total,2005,17842038 +FL,under18,2005,3968178 +FL,total,2006,18166990 +FL,under18,2006,4022912 +FL,total,2007,18367842 +FL,under18,2007,4031098 +FL,total,2008,18527305 +FL,under18,2008,4018372 +FL,total,2009,18652644 +FL,under18,2009,3997283 +FL,under18,2013,4026674 +FL,total,2013,19552860 +FL,under18,2010,3999532 +FL,total,2010,18846054 +FL,under18,2011,4002550 +FL,total,2011,19083482 +FL,under18,2012,4012421 +FL,total,2012,19320749 +GA,total,2012,9915646 +GA,under18,2012,2487831 +GA,under18,2011,2488898 +GA,total,2011,9810181 +GA,under18,2010,2490884 +GA,total,2010,9713248 +GA,total,2013,9992167 +GA,total,2009,9620846 +GA,under18,2009,2485781 +GA,under18,2013,2489709 +GA,total,2007,9349988 +GA,under18,2007,2456249 +GA,total,2008,9504843 +GA,under18,2008,2479097 +GA,total,2005,8925922 +GA,under18,2005,2353604 +GA,total,2006,9155813 +GA,under18,2006,2406014 +GA,total,2003,8622793 +GA,under18,2003,2278710 +GA,total,2004,8769252 +GA,under18,2004,2308855 +GA,total,2001,8377038 +GA,under18,2001,2215390 +GA,total,2002,8508256 +GA,under18,2002,2249784 +GA,total,1999,8045965 +GA,under18,1999,2130698 +GA,total,2000,8227303 +GA,under18,2000,2176576 +GA,total,1997,7685099 +GA,under18,1997,2034163 +GA,under18,1998,2078998 +GA,total,1998,7863536 +GA,under18,1996,1993171 +GA,total,1996,7501069 +GA,total,1995,7328413 +GA,under18,1995,1949818 +GA,under18,1992,1817781 +GA,total,1992,6817203 +GA,total,1993,6978240 +GA,under18,1993,1865021 +GA,under18,1994,1906539 +GA,total,1994,7157165 +GA,total,1991,6653005 +GA,under18,1991,1773675 +GA,under18,1990,1747363 +GA,total,1990,6512602 +HI,under18,1990,279983 +HI,total,1990,1113491 +HI,total,1991,1136754 +HI,under18,1991,287871 +HI,under18,1994,307517 +HI,total,1994,1187536 +HI,total,1993,1172838 +HI,under18,1993,301473 +HI,under18,1992,295124 +HI,total,1992,1158613 +HI,total,1995,1196854 +HI,under18,1995,310325 +HI,under18,1996,311213 +HI,total,1996,1203755 +HI,under18,1998,304576 +HI,total,1998,1215233 +HI,total,1997,1211640 +HI,under18,1997,309465 +HI,total,2000,1213519 +HI,under18,2000,295352 +HI,total,1999,1210300 +HI,under18,1999,299680 +HI,total,2002,1239613 +HI,under18,2002,293600 +HI,total,2001,1225948 +HI,under18,2001,294133 +HI,total,2004,1273569 +HI,under18,2004,298103 +HI,total,2003,1251154 +HI,under18,2003,294519 +HI,total,2006,1309731 +HI,under18,2006,299313 +HI,total,2005,1292729 +HI,under18,2005,298497 +HI,total,2008,1332213 +HI,under18,2008,301094 +HI,total,2007,1315675 +HI,under18,2007,300207 +HI,under18,2013,307266 +HI,total,2009,1346717 +HI,under18,2009,302796 +HI,total,2013,1404054 +HI,total,2010,1363731 +HI,under18,2010,303812 +HI,total,2011,1376897 +HI,under18,2011,305396 +HI,under18,2012,305981 +HI,total,2012,1390090 +ID,total,2012,1595590 +ID,under18,2012,427177 +ID,under18,2011,428535 +ID,total,2011,1583930 +ID,under18,2010,428961 +ID,total,2010,1570718 +ID,total,2013,1612136 +ID,total,2009,1554439 +ID,under18,2009,426076 +ID,under18,2013,427781 +ID,total,2007,1505105 +ID,under18,2007,415024 +ID,total,2008,1534320 +ID,under18,2008,422347 +ID,total,2005,1428241 +ID,under18,2005,394651 +ID,total,2006,1468669 +ID,under18,2006,404753 +ID,total,2003,1363380 +ID,under18,2003,379241 +ID,total,2004,1391802 +ID,under18,2004,384692 +ID,total,2001,1319962 +ID,under18,2001,373145 +ID,total,2002,1340372 +ID,under18,2002,375986 +ID,total,1999,1275674 +ID,under18,1999,366689 +ID,total,2000,1299430 +ID,under18,2000,370430 +ID,total,1997,1228520 +ID,under18,1997,357779 +ID,under18,1998,362189 +ID,total,1998,1252330 +ID,under18,1996,353824 +ID,total,1996,1203083 +ID,total,1995,1177322 +ID,under18,1995,349248 +ID,under18,1992,324972 +ID,total,1992,1071685 +ID,total,1993,1108768 +ID,under18,1993,333838 +ID,under18,1994,344242 +ID,total,1994,1145140 +ID,total,1991,1041316 +ID,under18,1991,316732 +ID,under18,1990,313373 +ID,total,1990,1012384 +IL,under18,1990,2940837 +IL,total,1990,11453316 +IL,total,1991,11568964 +IL,under18,1991,2988715 +IL,under18,1994,3110938 +IL,total,1994,11912585 +IL,total,1993,11809579 +IL,under18,1993,3066541 +IL,under18,1992,3033427 +IL,total,1992,11694184 +IL,total,1995,12008437 +IL,under18,1995,3152984 +IL,under18,1996,3192916 +IL,total,1996,12101997 +IL,under18,1998,3225252 +IL,total,1998,12271847 +IL,total,1997,12185715 +IL,under18,1997,3222114 +IL,total,2000,12434161 +IL,under18,2000,3244944 +IL,total,1999,12359020 +IL,under18,1999,3240034 +IL,total,2002,12525556 +IL,under18,2002,3238362 +IL,total,2001,12488445 +IL,under18,2001,3243617 +IL,total,2004,12589773 +IL,under18,2004,3211599 +IL,total,2003,12556006 +IL,under18,2003,3225547 +IL,total,2006,12643955 +IL,under18,2006,3181246 +IL,total,2005,12609903 +IL,under18,2005,3197318 +IL,total,2008,12747038 +IL,under18,2008,3153401 +IL,total,2007,12695866 +IL,under18,2007,3170134 +IL,under18,2013,3023307 +IL,total,2009,12796778 +IL,under18,2009,3138406 +IL,total,2013,12882135 +IL,total,2010,12839695 +IL,under18,2010,3122092 +IL,total,2011,12855970 +IL,under18,2011,3089833 +IL,under18,2012,3057042 +IL,total,2012,12868192 +IN,total,2012,6537782 +IN,under18,2012,1589655 +IN,under18,2011,1598091 +IN,total,2011,6516336 +IN,under18,2010,1605883 +IN,total,2010,6489965 +IN,total,2013,6570902 +IN,total,2009,6459325 +IN,under18,2009,1609704 +IN,under18,2013,1586027 +IN,total,2007,6379599 +IN,under18,2007,1609494 +IN,total,2008,6424806 +IN,under18,2008,1611494 +IN,total,2005,6278616 +IN,under18,2005,1593898 +IN,total,2006,6332669 +IN,under18,2006,1603107 +IN,total,2003,6196638 +IN,under18,2003,1582560 +IN,total,2004,6233007 +IN,under18,2004,1586281 +IN,total,2001,6127760 +IN,under18,2001,1579527 +IN,total,2002,6155967 +IN,under18,2002,1580814 +IN,total,1999,6044970 +IN,under18,1999,1566079 +IN,total,2000,6091866 +IN,under18,2000,1574989 +IN,total,1997,5955267 +IN,under18,1997,1539270 +IN,under18,1998,1551960 +IN,total,1998,5998881 +IN,under18,1996,1517961 +IN,total,1996,5906013 +IN,total,1995,5851459 +IN,under18,1995,1507916 +IN,under18,1992,1461650 +IN,total,1992,5674547 +IN,total,1993,5739019 +IN,under18,1993,1473007 +IN,under18,1994,1491802 +IN,total,1994,5793526 +IN,total,1991,5616388 +IN,under18,1991,1450759 +IN,under18,1990,1437209 +IN,total,1990,5557798 +IA,under18,1990,719366 +IA,total,1990,2781018 +IA,total,1991,2797613 +IA,under18,1991,724446 +IA,under18,1994,728397 +IA,total,1994,2850746 +IA,total,1993,2836972 +IA,under18,1993,727751 +IA,under18,1992,724798 +IA,total,1992,2818401 +IA,total,1995,2867373 +IA,under18,1995,726961 +IA,under18,1996,729177 +IA,total,1996,2880001 +IA,under18,1998,729943 +IA,total,1998,2902872 +IA,total,1997,2891119 +IA,under18,1997,729806 +IA,total,2000,2929067 +IA,under18,2000,733337 +IA,total,1999,2917634 +IA,under18,1999,732671 +IA,total,2002,2934234 +IA,under18,2002,723685 +IA,total,2001,2931997 +IA,under18,2001,728601 +IA,total,2004,2953635 +IA,under18,2004,718708 +IA,total,2003,2941999 +IA,under18,2003,720102 +IA,total,2006,2982644 +IA,under18,2006,721703 +IA,total,2005,2964454 +IA,under18,2005,718488 +IA,total,2008,3016734 +IA,under18,2008,725658 +IA,total,2007,2999212 +IA,under18,2007,723632 +IA,under18,2013,724032 +IA,total,2009,3032870 +IA,under18,2009,726969 +IA,total,2013,3090416 +IA,total,2010,3050314 +IA,under18,2010,727717 +IA,total,2011,3064102 +IA,under18,2011,725522 +IA,under18,2012,723917 +IA,total,2012,3075039 +KS,total,2012,2885398 +KS,under18,2012,726668 +KS,under18,2011,726787 +KS,total,2011,2869548 +KS,under18,2010,727729 +KS,total,2010,2858910 +KS,total,2013,2893957 +KS,total,2009,2832704 +KS,under18,2009,721841 +KS,under18,2013,724092 +KS,total,2007,2783785 +KS,under18,2007,711005 +KS,total,2008,2808076 +KS,under18,2008,714689 +KS,total,2005,2745299 +KS,under18,2005,704689 +KS,total,2006,2762931 +KS,under18,2006,705277 +KS,total,2003,2723004 +KS,under18,2003,707847 +KS,total,2004,2734373 +KS,under18,2004,705456 +KS,total,2001,2702162 +KS,under18,2001,710923 +KS,total,2002,2713535 +KS,under18,2002,709416 +KS,total,1999,2678338 +KS,under18,1999,713022 +KS,total,2000,2693681 +KS,under18,2000,713887 +KS,total,1997,2635292 +KS,under18,1997,704001 +KS,under18,1998,710402 +KS,total,1998,2660598 +KS,under18,1996,696298 +KS,total,1996,2614554 +KS,total,1995,2601008 +KS,under18,1995,694124 +KS,under18,1992,680871 +KS,total,1992,2532395 +KS,total,1993,2556547 +KS,under18,1993,687262 +KS,under18,1994,693673 +KS,total,1994,2580513 +KS,total,1991,2498722 +KS,under18,1991,672033 +KS,under18,1990,662641 +KS,total,1990,2481349 +KY,under18,1990,945951 +KY,total,1990,3694048 +KY,total,1991,3722328 +KY,under18,1991,951512 +KY,under18,1994,981439 +KY,total,1994,3849088 +KY,total,1993,3812206 +KY,under18,1993,971134 +KY,under18,1992,963861 +KY,total,1992,3765469 +KY,total,1995,3887427 +KY,under18,1995,984486 +KY,under18,1996,987062 +KY,total,1996,3919536 +KY,under18,1998,997296 +KY,total,1998,3985391 +KY,total,1997,3952747 +KY,under18,1997,1002609 +KY,total,2000,4049021 +KY,under18,2000,994984 +KY,total,1999,4018053 +KY,under18,1999,996382 +KY,total,2002,4089875 +KY,under18,2002,995251 +KY,total,2001,4068132 +KY,under18,2001,994105 +KY,total,2004,4146101 +KY,under18,2004,998459 +KY,total,2003,4117170 +KY,under18,2003,998485 +KY,total,2006,4219239 +KY,under18,2006,1011295 +KY,total,2005,4182742 +KY,under18,2005,1004020 +KY,total,2008,4289878 +KY,under18,2008,1022001 +KY,total,2007,4256672 +KY,under18,2007,1016288 +KY,under18,2013,1014004 +KY,total,2009,4317074 +KY,under18,2009,1021710 +KY,total,2013,4395295 +KY,total,2010,4347698 +KY,under18,2010,1023679 +KY,total,2011,4366869 +KY,under18,2011,1021926 +KY,under18,2012,1017350 +KY,total,2012,4379730 +LA,total,2012,4602134 +LA,under18,2012,1114620 +LA,under18,2011,1116579 +LA,total,2011,4575197 +LA,under18,2010,1118576 +LA,total,2010,4545392 +LA,total,2013,4625470 +LA,total,2009,4491648 +LA,under18,2009,1114228 +LA,under18,2013,1112957 +LA,total,2007,4375581 +LA,under18,2007,1096642 +LA,total,2008,4435586 +LA,under18,2008,1108728 +LA,total,2005,4576628 +LA,under18,2005,1177954 +LA,total,2006,4302665 +LA,under18,2006,1078779 +LA,total,2003,4521042 +LA,under18,2003,1188070 +LA,total,2004,4552238 +LA,under18,2004,1182731 +LA,total,2001,4477875 +LA,under18,2001,1204187 +LA,total,2002,4497267 +LA,under18,2002,1194819 +LA,total,2000,4471885 +LA,under18,2000,1217670 +LA,total,1999,4460811 +LA,under18,1999,1227167 +LA,total,1997,4421072 +LA,under18,1997,1239665 +LA,under18,1998,1232984 +LA,total,1998,4440344 +LA,under18,1996,1244627 +LA,total,1996,4398877 +LA,total,1995,4378779 +LA,under18,1995,1250112 +LA,under18,1992,1237034 +LA,total,1992,4293003 +LA,total,1993,4316428 +LA,under18,1993,1239161 +LA,under18,1994,1247631 +LA,total,1994,4347481 +LA,total,1991,4253279 +LA,under18,1991,1222330 +LA,under18,1990,1205984 +LA,total,1990,4221532 +ME,under18,1990,308066 +ME,total,1990,1231719 +ME,total,1991,1237081 +ME,under18,1991,309871 +ME,under18,1994,311570 +ME,total,1994,1242662 +ME,total,1993,1242302 +ME,under18,1993,310966 +ME,under18,1992,310679 +ME,total,1992,1238508 +ME,total,1995,1243481 +ME,under18,1995,309173 +ME,under18,1996,307740 +ME,total,1996,1249060 +ME,under18,1998,304496 +ME,total,1998,1259127 +ME,total,1997,1254774 +ME,under18,1997,305097 +ME,total,1999,1266808 +ME,under18,1999,302321 +ME,total,2000,1277072 +ME,under18,2000,301407 +ME,total,2002,1295960 +ME,under18,2002,298595 +ME,total,2001,1285692 +ME,under18,2001,300088 +ME,total,2004,1313688 +ME,under18,2004,294791 +ME,total,2003,1306513 +ME,under18,2003,296786 +ME,total,2006,1323619 +ME,under18,2006,288945 +ME,total,2005,1318787 +ME,under18,2005,292039 +ME,total,2008,1330509 +ME,under18,2008,282204 +ME,total,2007,1327040 +ME,under18,2007,286185 +ME,under18,2013,261276 +ME,total,2009,1329590 +ME,under18,2009,277946 +ME,total,2013,1328302 +ME,total,2010,1327366 +ME,under18,2010,273061 +ME,total,2011,1327844 +ME,under18,2011,268737 +ME,under18,2012,264846 +ME,total,2012,1328501 +MD,total,2012,5884868 +MD,under18,2012,1346235 +MD,under18,2011,1348766 +MD,total,2011,5840241 +MD,under18,2010,1351983 +MD,total,2010,5787193 +MD,total,2013,5928814 +MD,total,2009,5730388 +MD,under18,2009,1353631 +MD,under18,2013,1344522 +MD,total,2007,5653408 +MD,under18,2007,1369563 +MD,total,2008,5684965 +MD,under18,2008,1359214 +MD,total,2005,5592379 +MD,under18,2005,1382966 +MD,total,2006,5627367 +MD,under18,2006,1377756 +MD,total,2003,5496269 +MD,under18,2003,1379641 +MD,total,2004,5546935 +MD,under18,2004,1383450 +MD,total,2001,5374691 +MD,under18,2001,1366552 +MD,total,2002,5440389 +MD,under18,2002,1375354 +MD,total,2000,5311034 +MD,under18,2000,1356961 +MD,total,1999,5254509 +MD,under18,1999,1348659 +MD,total,1997,5157328 +MD,under18,1997,1321306 +MD,under18,1998,1338727 +MD,total,1998,5204464 +MD,under18,1996,1303816 +MD,total,1996,5111986 +MD,total,1995,5070033 +MD,under18,1995,1300695 +MD,under18,1992,1235498 +MD,total,1992,4923369 +MD,total,1993,4971889 +MD,under18,1993,1261738 +MD,under18,1994,1280772 +MD,total,1994,5023060 +MD,total,1991,4867641 +MD,under18,1991,1208898 +MD,under18,1990,1180426 +MD,total,1990,4799770 +MA,under18,1990,1353806 +MA,total,1990,6022639 +MA,total,1991,6018470 +MA,under18,1991,1375110 +MA,under18,1994,1437069 +MA,total,1994,6095241 +MA,total,1993,6060569 +MA,under18,1993,1415724 +MA,under18,1992,1390188 +MA,total,1992,6028709 +MA,total,1995,6141445 +MA,under18,1995,1453489 +MA,under18,1996,1468614 +MA,total,1996,6179756 +MA,under18,1998,1491652 +MA,total,1998,6271838 +MA,total,1997,6226058 +MA,under18,1997,1478203 +MA,total,1999,6317345 +MA,under18,1999,1495818 +MA,total,2000,6361104 +MA,under18,2000,1501334 +MA,total,2001,6397634 +MA,under18,2001,1505028 +MA,total,2002,6417206 +MA,under18,2002,1502652 +MA,total,2004,6412281 +MA,under18,2004,1479541 +MA,total,2003,6422565 +MA,under18,2003,1493372 +MA,total,2006,6410084 +MA,under18,2006,1450202 +MA,total,2005,6403290 +MA,under18,2005,1464140 +MA,total,2008,6468967 +MA,under18,2008,1429727 +MA,total,2007,6431559 +MA,under18,2007,1439757 +MA,under18,2013,1393946 +MA,total,2009,6517613 +MA,under18,2009,1422935 +MA,total,2013,6692824 +MA,total,2010,6563263 +MA,under18,2010,1415962 +MA,total,2011,6606285 +MA,under18,2011,1407240 +MA,under18,2012,1399417 +MA,total,2012,6645303 +MI,total,2012,9882519 +MI,under18,2012,2269365 +MI,under18,2011,2299116 +MI,total,2011,9874589 +MI,under18,2010,2333121 +MI,total,2010,9876149 +MI,total,2013,9895622 +MI,total,2009,9901591 +MI,under18,2009,2372603 +MI,under18,2013,2245201 +MI,total,2007,10001284 +MI,under18,2007,2470063 +MI,total,2008,9946889 +MI,under18,2008,2418879 +MI,total,2005,10051137 +MI,under18,2005,2531839 +MI,total,2006,10036081 +MI,under18,2006,2503548 +MI,total,2003,10041152 +MI,under18,2003,2569080 +MI,total,2004,10055315 +MI,under18,2004,2553314 +MI,total,2002,10015710 +MI,under18,2002,2584310 +MI,total,2001,9991120 +MI,under18,2001,2593310 +MI,total,2000,9952450 +MI,under18,2000,2596114 +MI,total,1999,9897116 +MI,under18,1999,2591944 +MI,total,1997,9809051 +MI,under18,1997,2582270 +MI,under18,1998,2586343 +MI,total,1998,9847942 +MI,under18,1996,2569745 +MI,total,1996,9758645 +MI,total,1995,9676211 +MI,under18,1995,2556799 +MI,under18,1992,2501765 +MI,total,1992,9479065 +MI,total,1993,9540114 +MI,under18,1993,2522249 +MI,under18,1994,2535196 +MI,total,1994,9597737 +MI,total,1991,9400446 +MI,under18,1991,2484957 +MI,under18,1990,2459633 +MI,total,1990,9311319 +MN,under18,1990,1176680 +MN,total,1990,4389857 +MN,total,1991,4440859 +MN,under18,1991,1191207 +MN,under18,1994,1238949 +MN,total,1994,4610355 +MN,total,1993,4555956 +MN,under18,1993,1226723 +MN,under18,1992,1213068 +MN,total,1992,4495572 +MN,total,1995,4660180 +MN,under18,1995,1245932 +MN,under18,1996,1252722 +MN,total,1996,4712827 +MN,under18,1998,1275940 +MN,total,1998,4813412 +MN,total,1997,4763390 +MN,under18,1997,1264250 +MN,total,1999,4873481 +MN,under18,1999,1283102 +MN,total,2000,4933692 +MN,under18,2000,1289715 +MN,total,2001,4982796 +MN,under18,2001,1291261 +MN,total,2002,5018935 +MN,under18,2002,1288795 +MN,total,2004,5087713 +MN,under18,2004,1281946 +MN,total,2003,5053572 +MN,under18,2003,1283687 +MN,total,2006,5163555 +MN,under18,2006,1282381 +MN,total,2005,5119598 +MN,under18,2005,1280557 +MN,total,2008,5247018 +MN,under18,2008,1284179 +MN,total,2007,5207203 +MN,under18,2007,1285074 +MN,under18,2013,1279111 +MN,total,2009,5281203 +MN,under18,2009,1284103 +MN,total,2013,5420380 +MN,total,2010,5310337 +MN,under18,2010,1282693 +MN,total,2011,5347108 +MN,under18,2011,1280424 +MN,under18,2012,1278050 +MN,total,2012,5379646 +MS,total,2012,2986450 +MS,under18,2012,742941 +MS,under18,2011,747742 +MS,total,2011,2977886 +MS,under18,2010,754111 +MS,total,2010,2970047 +MS,total,2013,2991207 +MS,total,2009,2958774 +MS,under18,2009,758539 +MS,under18,2013,737432 +MS,total,2007,2928350 +MS,under18,2007,761171 +MS,total,2008,2947806 +MS,under18,2008,760572 +MS,total,2005,2905943 +MS,under18,2005,760870 +MS,total,2006,2904978 +MS,under18,2006,756990 +MS,total,2003,2868312 +MS,under18,2003,759447 +MS,total,2004,2889010 +MS,under18,2004,760410 +MS,total,2002,2858681 +MS,under18,2002,763148 +MS,total,2001,2852994 +MS,under18,2001,768418 +MS,total,2000,2848353 +MS,under18,2000,774353 +MS,total,1999,2828408 +MS,under18,1999,775662 +MS,total,1997,2777004 +MS,under18,1997,774832 +MS,under18,1998,773721 +MS,total,1998,2804834 +MS,under18,1996,769680 +MS,total,1996,2748085 +MS,total,1995,2722659 +MS,under18,1995,767892 +MS,under18,1992,750224 +MS,total,1992,2623734 +MS,total,1993,2655100 +MS,under18,1993,755820 +MS,under18,1994,763795 +MS,total,1994,2688992 +MS,total,1991,2598733 +MS,under18,1991,738911 +MS,under18,1990,733660 +MS,total,1990,2578897 +MO,under18,1990,1316423 +MO,total,1990,5128880 +MO,total,1991,5170800 +MO,under18,1991,1332306 +MO,under18,1994,1378700 +MO,total,1994,5324497 +MO,total,1993,5271175 +MO,under18,1993,1365903 +MO,under18,1992,1349729 +MO,total,1992,5217101 +MO,under18,1996,1408732 +MO,total,1996,5431553 +MO,total,1995,5378247 +MO,under18,1995,1393554 +MO,under18,1998,1428999 +MO,total,1998,5521765 +MO,total,1997,5481193 +MO,under18,1997,1419837 +MO,total,1999,5561948 +MO,under18,1999,1428047 +MO,total,2000,5607285 +MO,under18,2000,1428383 +MO,total,2001,5641142 +MO,under18,2001,1426575 +MO,total,2002,5674825 +MO,under18,2002,1424513 +MO,total,2004,5747741 +MO,under18,2004,1420956 +MO,total,2003,5709403 +MO,under18,2003,1421927 +MO,total,2006,5842704 +MO,under18,2006,1428324 +MO,total,2005,5790300 +MO,under18,2005,1422978 +MO,total,2008,5923916 +MO,under18,2008,1428945 +MO,total,2007,5887612 +MO,under18,2007,1431346 +MO,under18,2013,1397685 +MO,total,2009,5961088 +MO,under18,2009,1426603 +MO,total,2013,6044171 +MO,total,2010,5996063 +MO,under18,2010,1424042 +MO,total,2011,6010065 +MO,under18,2011,1414444 +MO,under18,2012,1405015 +MO,total,2012,6024522 +MT,total,2012,1005494 +MT,under18,2012,222905 +MT,under18,2011,222977 +MT,total,2011,997600 +MT,under18,2010,223292 +MT,total,2010,990527 +MT,total,2013,1015165 +MT,total,2009,983982 +MT,under18,2009,223675 +MT,under18,2013,223981 +MT,total,2007,964706 +MT,under18,2007,223135 +MT,total,2008,976415 +MT,under18,2008,223814 +MT,total,2005,940102 +MT,under18,2005,221685 +MT,total,2006,952692 +MT,under18,2006,221930 +MT,total,2003,919630 +MT,under18,2003,223012 +MT,total,2004,930009 +MT,under18,2004,221999 +MT,total,2002,911667 +MT,under18,2002,224772 +MT,total,2001,906961 +MT,under18,2001,227118 +MT,total,1999,897508 +MT,under18,1999,231133 +MT,total,2000,903773 +MT,under18,2000,230067 +MT,total,1997,889865 +MT,under18,1997,232813 +MT,under18,1998,231746 +MT,total,1998,892431 +MT,total,1995,876553 +MT,under18,1995,236583 +MT,under18,1996,235294 +MT,total,1996,886254 +MT,under18,1992,230868 +MT,total,1992,825770 +MT,total,1993,844761 +MT,under18,1993,234987 +MT,under18,1994,237289 +MT,total,1994,861306 +MT,total,1991,809680 +MT,under18,1991,225259 +MT,under18,1990,223677 +MT,total,1990,800204 +NE,under18,1990,430068 +NE,total,1990,1581660 +NE,total,1991,1595919 +NE,under18,1991,434525 +NE,under18,1994,442589 +NE,total,1994,1639041 +NE,total,1993,1625590 +NE,under18,1993,439313 +NE,under18,1992,436378 +NE,total,1992,1611687 +NE,under18,1996,446841 +NE,total,1996,1673740 +NE,total,1995,1656993 +NE,under18,1995,444418 +NE,under18,1998,451192 +NE,total,1998,1695817 +NE,total,1997,1686418 +NE,under18,1997,450076 +NE,total,1999,1704764 +NE,under18,1999,451047 +NE,total,2000,1713820 +NE,under18,2000,450380 +NE,total,2001,1719836 +NE,under18,2001,448307 +NE,total,2002,1728292 +NE,under18,2002,447714 +NE,total,2004,1749370 +NE,under18,2004,448360 +NE,total,2003,1738643 +NE,under18,2003,447444 +NE,total,2006,1772693 +NE,under18,2006,450098 +NE,total,2005,1761497 +NE,under18,2005,448918 +NE,total,2008,1796378 +NE,under18,2008,453787 +NE,total,2007,1783440 +NE,under18,2007,451946 +NE,under18,2013,464348 +NE,total,2009,1812683 +NE,under18,2009,456543 +NE,total,2013,1868516 +NE,total,2010,1829838 +NE,under18,2010,459621 +NE,total,2011,1841749 +NE,under18,2011,460872 +NE,under18,2012,462673 +NE,total,2012,1855350 +NV,total,2012,2754354 +NV,under18,2012,659655 +NV,under18,2011,659236 +NV,total,2011,2717951 +NV,under18,2010,663180 +NV,total,2010,2703230 +NV,total,2013,2790136 +NV,total,2009,2684665 +NV,under18,2009,666041 +NV,under18,2013,661605 +NV,total,2007,2601072 +NV,under18,2007,654053 +NV,total,2008,2653630 +NV,under18,2008,662621 +NV,total,2005,2432143 +NV,under18,2005,611595 +NV,total,2006,2522658 +NV,under18,2006,634403 +NV,total,2003,2248850 +NV,under18,2003,568963 +NV,total,2004,2346222 +NV,under18,2004,591314 +NV,total,2002,2173791 +NV,under18,2002,552816 +NV,total,2001,2098399 +NV,under18,2001,534708 +NV,total,1999,1934718 +NV,under18,1999,493701 +NV,total,2000,2018741 +NV,under18,2000,516018 +NV,total,1997,1764104 +NV,under18,1997,443626 +NV,under18,1998,469424 +NV,total,1998,1853192 +NV,total,1995,1581578 +NV,under18,1995,396223 +NV,under18,1996,419133 +NV,total,1996,1666320 +NV,under18,1992,337396 +NV,total,1992,1351367 +NV,total,1993,1411215 +NV,under18,1993,354990 +NV,under18,1994,376745 +NV,total,1994,1499298 +NV,total,1991,1296172 +NV,under18,1991,325033 +NV,under18,1990,316406 +NV,total,1990,1220695 +NH,under18,1990,277454 +NH,total,1990,1112384 +NH,total,1991,1109929 +NH,under18,1991,281127 +NH,under18,1994,295563 +NH,total,1994,1142561 +NH,total,1993,1129458 +NH,under18,1993,290409 +NH,under18,1992,286314 +NH,total,1992,1117785 +NH,under18,1996,300161 +NH,total,1996,1174719 +NH,total,1995,1157561 +NH,under18,1995,298246 +NH,under18,1998,307292 +NH,total,1998,1205941 +NH,total,1997,1189425 +NH,under18,1997,302834 +NH,total,2000,1239882 +NH,under18,2000,310352 +NH,total,1999,1222015 +NH,under18,1999,308423 +NH,total,2001,1255517 +NH,under18,2001,311877 +NH,total,2002,1269089 +NH,under18,2002,312743 +NH,total,2004,1290121 +NH,under18,2004,309243 +NH,total,2003,1279840 +NH,under18,2003,311412 +NH,total,2005,1298492 +NH,under18,2005,307403 +NH,total,2006,1308389 +NH,under18,2006,305169 +NH,total,2008,1315906 +NH,under18,2008,296029 +NH,total,2007,1312540 +NH,under18,2007,300918 +NH,under18,2013,271122 +NH,total,2009,1316102 +NH,under18,2009,290850 +NH,total,2013,1323459 +NH,total,2010,1316614 +NH,under18,2010,285702 +NH,total,2011,1318075 +NH,under18,2011,280486 +NH,under18,2012,275818 +NH,total,2012,1321617 +NJ,total,2012,8867749 +NJ,under18,2012,2035106 +NJ,under18,2011,2049453 +NJ,total,2011,8836639 +NJ,under18,2010,2062013 +NJ,total,2010,8802707 +NJ,total,2013,8899339 +NJ,total,2009,8755602 +NJ,under18,2009,2068684 +NJ,under18,2013,2022117 +NJ,total,2007,8677885 +NJ,under18,2007,2091023 +NJ,total,2008,8711090 +NJ,under18,2008,2076366 +NJ,total,2006,8661679 +NJ,under18,2006,2106403 +NJ,total,2005,8651974 +NJ,under18,2005,2121878 +NJ,total,2003,8601402 +NJ,under18,2003,2126014 +NJ,total,2004,8634561 +NJ,under18,2004,2129051 +NJ,total,2002,8552643 +NJ,under18,2002,2116591 +NJ,total,2001,8492671 +NJ,under18,2001,2102838 +NJ,total,1999,8359592 +NJ,under18,1999,2066678 +NJ,total,2000,8430621 +NJ,under18,2000,2088885 +NJ,total,1997,8218808 +NJ,under18,1997,2028349 +NJ,under18,1998,2042080 +NJ,total,1998,8287418 +NJ,total,1995,8083242 +NJ,under18,1995,1997187 +NJ,under18,1996,2016502 +NJ,total,1996,8149596 +NJ,under18,1992,1890108 +NJ,total,1992,7880508 +NJ,total,1993,7948915 +NJ,under18,1993,1928623 +NJ,under18,1994,1968232 +NJ,total,1994,8014306 +NJ,total,1991,7814676 +NJ,under18,1991,1849605 +NJ,under18,1990,1818187 +NJ,total,1990,7762963 +NM,total,1990,1521574 +NM,under18,1990,453538 +NM,under18,1991,461811 +NM,total,1991,1555305 +NM,under18,1994,497542 +NM,under18,1993,487742 +NM,total,1993,1636453 +NM,total,1992,1595442 +NM,under18,1992,473176 +NM,total,1994,1682398 +NM,under18,1996,508100 +NM,total,1995,1720394 +NM,under18,1995,504558 +NM,total,1996,1752326 +NM,under18,1998,512801 +NM,total,1998,1793484 +NM,total,1997,1774839 +NM,under18,1997,514500 +NM,under18,1999,511135 +NM,total,1999,1808082 +NM,total,2000,1821204 +NM,under18,2000,508132 +NM,total,2001,1831690 +NM,under18,2001,503404 +NM,total,2002,1855309 +NM,under18,2002,502779 +NM,total,2004,1903808 +NM,under18,2004,501184 +NM,total,2003,1877574 +NM,under18,2003,500777 +NM,total,2005,1932274 +NM,under18,2005,502612 +NM,total,2006,1962137 +NM,under18,2006,505125 +NM,total,2008,2010662 +NM,under18,2008,511214 +NM,total,2007,1990070 +NM,under18,2007,508725 +NM,under18,2013,507540 +NM,total,2013,2085287 +NM,total,2009,2036802 +NM,under18,2009,515470 +NM,total,2010,2064982 +NM,under18,2010,518763 +NM,under18,2011,516513 +NM,total,2011,2077919 +NM,under18,2012,512314 +NM,total,2012,2083540 +NY,total,2012,19576125 +NY,under18,2012,4264694 +NY,total,2011,19502728 +NY,under18,2011,4294555 +NY,under18,2010,4318033 +NY,total,2010,19398228 +NY,total,2009,19307066 +NY,under18,2009,4342926 +NY,total,2013,19651127 +NY,under18,2013,4239976 +NY,total,2007,19132335 +NY,under18,2007,4410949 +NY,total,2008,19212436 +NY,under18,2008,4372170 +NY,total,2006,19104631 +NY,under18,2006,4457777 +NY,total,2005,19132610 +NY,under18,2005,4514456 +NY,total,2003,19175939 +NY,under18,2003,4619506 +NY,total,2004,19171567 +NY,under18,2004,4574065 +NY,total,2002,19137800 +NY,under18,2002,4652232 +NY,total,2001,19082838 +NY,under18,2001,4672425 +NY,under18,1999,4672587 +NY,total,1999,18882725 +NY,total,2000,19001780 +NY,under18,2000,4687374 +NY,under18,1997,4670787 +NY,total,1997,18656546 +NY,total,1998,18755906 +NY,under18,1998,4652140 +NY,total,1996,18588460 +NY,under18,1995,4648419 +NY,total,1995,18524104 +NY,under18,1996,4667647 +NY,total,1994,18459470 +NY,under18,1992,4465539 +NY,total,1992,18246653 +NY,total,1993,18374954 +NY,under18,1993,4538171 +NY,under18,1994,4605284 +NY,total,1991,18122510 +NY,under18,1991,4372727 +NY,under18,1990,4281643 +NY,total,1990,18020784 +NC,under18,1990,1625804 +NC,total,1990,6664016 +NC,total,1991,6784280 +NC,under18,1991,1640394 +NC,total,1993,7042818 +NC,under18,1993,1710267 +NC,under18,1992,1674144 +NC,total,1992,6897214 +NC,under18,1994,1750754 +NC,total,1994,7187398 +NC,total,1995,7344674 +NC,under18,1995,1785150 +NC,under18,1996,1821506 +NC,total,1996,7500670 +NC,under18,1998,1894753 +NC,total,1998,7809122 +NC,total,1997,7656825 +NC,under18,1997,1861621 +NC,total,2000,8081614 +NC,under18,2000,1967626 +NC,total,1999,7949362 +NC,under18,1999,1932141 +NC,total,2001,8210122 +NC,under18,2001,2003782 +NC,total,2002,8326201 +NC,under18,2002,2034451 +NC,total,2004,8553152 +NC,under18,2004,2085165 +NC,total,2003,8422501 +NC,under18,2003,2060838 +NC,total,2005,8705407 +NC,under18,2005,2122485 +NC,total,2006,8917270 +NC,under18,2006,2166393 +NC,total,2008,9309449 +NC,under18,2008,2252101 +NC,total,2007,9118037 +NC,under18,2007,2219168 +NC,under18,2013,2285605 +NC,total,2013,9848060 +NC,total,2009,9449566 +NC,under18,2009,2272955 +NC,total,2010,9559533 +NC,under18,2010,2282288 +NC,under18,2011,2284238 +NC,total,2011,9651377 +NC,under18,2012,2284122 +NC,total,2012,9748364 +ND,total,2012,701345 +ND,under18,2012,156765 +ND,total,2011,684867 +ND,under18,2011,152357 +ND,under18,2010,150179 +ND,total,2010,674344 +ND,total,2009,664968 +ND,under18,2009,148674 +ND,total,2013,723393 +ND,under18,2013,162688 +ND,total,2007,652822 +ND,under18,2007,147263 +ND,total,2008,657569 +ND,under18,2008,147462 +ND,total,2006,649422 +ND,under18,2006,147331 +ND,total,2005,646089 +ND,under18,2005,148119 +ND,total,2003,638817 +ND,under18,2003,150406 +ND,total,2004,644705 +ND,under18,2004,149128 +ND,total,2002,638168 +ND,under18,2002,153097 +ND,total,2001,639062 +ND,under18,2001,156113 +ND,total,1999,644259 +ND,under18,1999,163056 +ND,total,2000,642023 +ND,under18,2000,160477 +ND,total,1997,649716 +ND,under18,1997,167475 +ND,under18,1998,165448 +ND,total,1998,647532 +ND,under18,1996,169257 +ND,total,1996,650382 +ND,total,1995,647832 +ND,under18,1995,171146 +ND,under18,1994,172160 +ND,total,1994,644806 +ND,under18,1992,172052 +ND,total,1992,638223 +ND,total,1993,641216 +ND,under18,1993,172168 +ND,total,1991,635753 +ND,under18,1991,171730 +ND,under18,1990,170920 +ND,total,1990,637685 +OH,under18,1990,2778491 +OH,total,1990,10864162 +OH,total,1991,10945762 +OH,under18,1991,2806959 +OH,total,1993,11101140 +OH,under18,1993,2855785 +OH,under18,1992,2839356 +OH,total,1992,11029431 +OH,under18,1994,2875397 +OH,total,1994,11152455 +OH,total,1995,11202751 +OH,under18,1995,2879930 +OH,under18,1996,2883443 +OH,total,1996,11242827 +OH,under18,1998,2896255 +OH,total,1998,11311536 +OH,total,1997,11277357 +OH,under18,1997,2897375 +OH,total,2000,11363543 +OH,under18,2000,2886585 +OH,total,1999,11335454 +OH,under18,1999,2893270 +OH,total,2001,11387404 +OH,under18,2001,2878123 +OH,total,2002,11407889 +OH,under18,2002,2865674 +OH,total,2004,11452251 +OH,under18,2004,2836068 +OH,total,2003,11434788 +OH,under18,2003,2849573 +OH,total,2005,11463320 +OH,under18,2005,2819794 +OH,total,2006,11481213 +OH,under18,2006,2804828 +OH,total,2008,11515391 +OH,under18,2008,2768968 +OH,total,2007,11500468 +OH,under18,2007,2790347 +OH,under18,2013,2649830 +OH,total,2013,11570808 +OH,total,2009,11528896 +OH,under18,2009,2748051 +OH,total,2010,11545435 +OH,under18,2010,2722589 +OH,under18,2011,2693469 +OH,total,2011,11549772 +OH,under18,2012,2668125 +OH,total,2012,11553031 +OK,total,2012,3815780 +OK,under18,2012,939911 +OK,total,2011,3785534 +OK,under18,2011,935714 +OK,under18,2010,931483 +OK,total,2010,3759263 +OK,total,2009,3717572 +OK,under18,2009,922711 +OK,total,2013,3850568 +OK,under18,2013,947027 +OK,total,2007,3634349 +OK,under18,2007,904328 +OK,total,2008,3668976 +OK,under18,2008,910617 +OK,total,2006,3594090 +OK,under18,2006,894761 +OK,total,2005,3548597 +OK,under18,2005,885316 +OK,total,2003,3504892 +OK,under18,2003,883959 +OK,total,2004,3525233 +OK,under18,2004,881606 +OK,total,2002,3489080 +OK,under18,2002,884961 +OK,total,2001,3467100 +OK,under18,2001,885218 +OK,total,1999,3437148 +OK,under18,1999,895678 +OK,total,2000,3454365 +OK,under18,2000,891847 +OK,total,1997,3372918 +OK,under18,1997,893835 +OK,under18,1998,898501 +OK,total,1998,3405194 +OK,under18,1996,887093 +OK,total,1996,3340129 +OK,total,1995,3308208 +OK,under18,1995,883667 +OK,under18,1994,877803 +OK,total,1994,3280940 +OK,under18,1992,862548 +OK,total,1992,3220517 +OK,total,1993,3252285 +OK,under18,1993,870137 +OK,total,1991,3175440 +OK,under18,1991,849639 +OK,under18,1990,841715 +OK,total,1990,3148825 +OR,under18,1990,742436 +OR,total,1990,2860375 +OR,total,1991,2928507 +OR,under18,1991,752442 +OR,total,1993,3060367 +OR,under18,1993,778973 +OR,under18,1992,770191 +OR,total,1992,2991755 +OR,under18,1994,793435 +OR,total,1994,3121264 +OR,total,1995,3184369 +OR,under18,1995,806512 +OR,under18,1996,816102 +OR,total,1996,3247111 +OR,under18,1998,837928 +OR,total,1998,3352449 +OR,total,1997,3304310 +OR,under18,1997,830002 +OR,total,2000,3429708 +OR,under18,2000,847511 +OR,total,1999,3393941 +OR,under18,1999,843484 +OR,total,2001,3467937 +OR,under18,2001,848663 +OR,total,2002,3513424 +OR,under18,2002,850733 +OR,total,2004,3569463 +OR,under18,2004,846786 +OR,total,2003,3547376 +OR,under18,2003,850251 +OR,total,2005,3613202 +OR,under18,2005,849323 +OR,total,2006,3670883 +OR,under18,2006,857003 +OR,total,2008,3768748 +OR,under18,2008,865664 +OR,total,2007,3722417 +OR,under18,2007,862161 +OR,under18,2013,857606 +OR,total,2013,3930065 +OR,total,2009,3808600 +OR,under18,2009,866194 +OR,total,2010,3837208 +OR,under18,2010,865129 +OR,under18,2011,862518 +OR,total,2011,3867937 +OR,under18,2012,859910 +OR,total,2012,3899801 +PA,total,2012,12764475 +PA,under18,2012,2737905 +PA,total,2011,12741310 +PA,under18,2011,2761343 +PA,under18,2010,2785316 +PA,total,2010,12710472 +PA,total,2009,12666858 +PA,under18,2009,2804929 +PA,total,2013,12773801 +PA,under18,2013,2715645 +PA,total,2007,12563937 +PA,under18,2007,2839574 +PA,total,2008,12612285 +PA,under18,2008,2821004 +PA,total,2006,12510809 +PA,under18,2006,2850778 +PA,total,2005,12449990 +PA,under18,2005,2859793 +PA,total,2003,12374658 +PA,under18,2003,2883270 +PA,total,2004,12410722 +PA,under18,2004,2873125 +PA,total,2002,12331031 +PA,under18,2002,2894935 +PA,total,2001,12298970 +PA,under18,2001,2905836 +PA,total,1999,12263805 +PA,under18,1999,2930193 +PA,total,2000,12284173 +PA,under18,2000,2918850 +PA,total,1997,12227814 +PA,under18,1997,2942240 +PA,under18,1998,2940285 +PA,total,1998,12245672 +PA,under18,1996,2937411 +PA,total,1996,12220464 +PA,total,1995,12198403 +PA,under18,1995,2941531 +PA,under18,1994,2932851 +PA,total,1994,12166050 +PA,under18,1992,2873013 +PA,total,1992,12049450 +PA,total,1993,12119724 +PA,under18,1993,2907351 +PA,total,1991,11982164 +PA,under18,1991,2830059 +PA,under18,1990,2799168 +PA,total,1990,11903299 +RI,under18,1990,225923 +RI,total,1990,1005995 +RI,total,1991,1010649 +RI,under18,1991,229448 +RI,total,1993,1015113 +RI,under18,1993,237218 +RI,under18,1992,232630 +RI,total,1992,1012581 +RI,under18,1994,239100 +RI,total,1994,1015960 +RI,total,1995,1017002 +RI,under18,1995,240553 +RI,under18,1996,240569 +RI,total,1996,1020893 +RI,under18,1998,241760 +RI,total,1998,1031155 +RI,total,1997,1025353 +RI,under18,1997,242079 +RI,total,2000,1050268 +RI,under18,2000,248065 +RI,total,1999,1040402 +RI,under18,1999,247014 +RI,total,2001,1057142 +RI,under18,2001,248296 +RI,total,2002,1065995 +RI,under18,2002,248690 +RI,total,2004,1074579 +RI,under18,2004,246228 +RI,total,2003,1071342 +RI,under18,2003,248075 +RI,total,2005,1067916 +RI,under18,2005,241932 +RI,total,2006,1063096 +RI,under18,2006,237348 +RI,total,2008,1055003 +RI,under18,2008,229798 +RI,total,2007,1057315 +RI,under18,2007,233655 +RI,under18,2013,213987 +RI,total,2013,1051511 +RI,total,2009,1053646 +RI,under18,2009,225902 +RI,total,2010,1052669 +RI,under18,2010,223088 +RI,under18,2011,219783 +RI,total,2011,1050350 +RI,under18,2012,216591 +RI,total,2012,1050304 +SC,total,2012,4723417 +SC,under18,2012,1077455 +SC,total,2011,4673509 +SC,under18,2011,1076524 +SC,under18,2010,1079978 +SC,total,2010,4636361 +SC,total,2009,4589872 +SC,under18,2009,1079729 +SC,total,2013,4774839 +SC,under18,2013,1079798 +SC,total,2007,4444110 +SC,under18,2007,1064190 +SC,total,2008,4528996 +SC,under18,2008,1074116 +SC,total,2006,4357847 +SC,under18,2006,1050042 +SC,total,2005,4270150 +SC,under18,2005,1036941 +SC,total,2003,4150297 +SC,under18,2003,1023785 +SC,total,2004,4210921 +SC,under18,2004,1029111 +SC,total,2002,4107795 +SC,under18,2002,1020531 +SC,total,2001,4064995 +SC,under18,2001,1016134 +SC,total,1999,3974682 +SC,under18,1999,1007050 +SC,total,2000,4024223 +SC,under18,2000,1010641 +SC,total,1997,3859696 +SC,under18,1997,1001681 +SC,under18,1998,1006371 +SC,total,1998,3919235 +SC,under18,1996,987576 +SC,total,1996,3796200 +SC,total,1995,3748582 +SC,under18,1995,975884 +SC,under18,1994,969766 +SC,total,1994,3705397 +SC,under18,1992,947868 +SC,total,1992,3620464 +SC,total,1993,3663314 +SC,under18,1993,956951 +SC,total,1991,3570404 +SC,under18,1991,936122 +SC,under18,1990,921041 +SC,total,1990,3501155 +SD,under18,1990,199453 +SD,total,1990,697101 +SD,total,1991,703669 +SD,under18,1991,201749 +SD,total,1993,722160 +SD,under18,1993,207975 +SD,under18,1992,206632 +SD,total,1992,712801 +SD,under18,1994,208443 +SD,total,1994,730790 +SD,total,1995,737926 +SD,under18,1995,207890 +SD,under18,1996,205780 +SD,total,1996,742214 +SD,under18,1998,204786 +SD,total,1998,746059 +SD,total,1997,744223 +SD,under18,1997,205978 +SD,total,2000,755844 +SD,under18,2000,202681 +SD,total,1999,750413 +SD,under18,1999,203737 +SD,total,2001,757972 +SD,under18,2001,200795 +SD,total,2002,760020 +SD,under18,2002,198694 +SD,total,2004,770396 +SD,under18,2004,196804 +SD,total,2003,763729 +SD,under18,2003,197326 +SD,total,2005,775493 +SD,under18,2005,196476 +SD,total,2006,783033 +SD,under18,2006,197332 +SD,total,2008,799124 +SD,under18,2008,199848 +SD,total,2007,791623 +SD,under18,2007,198847 +SD,under18,2013,207959 +SD,total,2013,844877 +SD,total,2009,807067 +SD,under18,2009,201204 +SD,total,2010,816211 +SD,under18,2010,203145 +SD,under18,2011,203948 +SD,total,2011,823772 +SD,under18,2012,205298 +SD,total,2012,834047 +TN,total,2012,6454914 +TN,under18,2012,1492689 +TN,total,2011,6398361 +TN,under18,2011,1491837 +TN,under18,2010,1495090 +TN,total,2010,6356683 +TN,total,2009,6306019 +TN,under18,2009,1494687 +TN,total,2013,6495978 +TN,under18,2013,1491577 +TN,total,2007,6175727 +TN,under18,2007,1482747 +TN,total,2008,6247411 +TN,under18,2008,1494354 +TN,total,2006,6088766 +TN,under18,2006,1470166 +TN,total,2005,5991057 +TN,under18,2005,1449326 +TN,total,2003,5847812 +TN,under18,2003,1424861 +TN,total,2004,5910809 +TN,under18,2004,1433343 +TN,total,2002,5795918 +TN,under18,2002,1414857 +TN,total,2001,5750789 +TN,under18,2001,1407578 +TN,total,1999,5638706 +TN,under18,1999,1385997 +TN,total,2000,5703719 +TN,under18,2000,1399685 +TN,total,1997,5499233 +TN,under18,1997,1359030 +TN,under18,1998,1369987 +TN,total,1998,5570045 +TN,under18,1996,1345723 +TN,total,1996,5416643 +TN,total,1995,5326936 +TN,under18,1995,1331616 +TN,under18,1994,1310988 +TN,total,1994,5231438 +TN,under18,1992,1259458 +TN,total,1992,5049742 +TN,total,1993,5137584 +TN,under18,1993,1285044 +TN,total,1991,4966587 +TN,under18,1991,1233260 +TN,under18,1990,1220200 +TN,total,1990,4894492 +TX,under18,1990,4906220 +TX,total,1990,17056755 +TX,total,1991,17398005 +TX,under18,1991,5000793 +TX,total,1993,18161612 +TX,under18,1993,5217899 +TX,under18,1992,5109805 +TX,total,1992,17759738 +TX,under18,1994,5331524 +TX,total,1994,18564062 +TX,total,1995,18958751 +TX,under18,1995,5421784 +TX,under18,1996,5551447 +TX,total,1996,19340342 +TX,under18,1998,5759054 +TX,total,1998,20157531 +TX,total,1997,19740317 +TX,under18,1997,5655482 +TX,total,2000,20944499 +TX,under18,2000,5906301 +TX,total,1999,20558220 +TX,under18,1999,5840211 +TX,total,2001,21319622 +TX,under18,2001,5980187 +TX,total,2002,21690325 +TX,under18,2002,6060372 +TX,total,2004,22394023 +TX,under18,2004,6208259 +TX,total,2003,22030931 +TX,under18,2003,6132980 +TX,total,2005,22778123 +TX,under18,2005,6290970 +TX,total,2006,23359580 +TX,under18,2006,6446798 +TX,total,2008,24309039 +TX,under18,2008,6675917 +TX,total,2007,23831983 +TX,under18,2007,6565872 +TX,under18,2013,7041986 +TX,total,2013,26448193 +TX,total,2009,24801761 +TX,under18,2009,6792907 +TX,total,2010,25245178 +TX,under18,2010,6879014 +TX,under18,2011,6931758 +TX,total,2011,25640909 +TX,under18,2012,6985807 +TX,total,2012,26060796 +UT,total,2012,2854871 +UT,under18,2012,888578 +UT,total,2011,2814784 +UT,under18,2011,881350 +UT,under18,2010,873019 +UT,total,2010,2774424 +UT,total,2009,2723421 +UT,under18,2009,857853 +UT,total,2013,2900872 +UT,under18,2013,896589 +UT,total,2007,2597746 +UT,under18,2007,815496 +UT,total,2008,2663029 +UT,under18,2008,837258 +UT,total,2006,2525507 +UT,under18,2006,789957 +UT,total,2005,2457719 +UT,under18,2005,767888 +UT,total,2003,2360137 +UT,under18,2003,740483 +UT,total,2004,2401580 +UT,under18,2004,751771 +UT,total,2002,2324815 +UT,under18,2002,733517 +UT,total,2001,2283715 +UT,under18,2001,726819 +UT,total,1999,2203482 +UT,under18,1999,715398 +UT,total,2000,2244502 +UT,under18,2000,721686 +UT,total,1997,2119784 +UT,under18,1997,699528 +UT,under18,1998,709386 +UT,total,1998,2165961 +UT,under18,1996,687078 +UT,total,1996,2067976 +UT,total,1995,2014179 +UT,under18,1995,679636 +UT,under18,1994,673935 +UT,total,1994,1960446 +UT,under18,1992,648725 +UT,total,1992,1836799 +UT,total,1993,1898404 +UT,under18,1993,662968 +UT,total,1991,1779780 +UT,under18,1991,637216 +UT,under18,1990,627122 +UT,total,1990,1731223 +VT,under18,1990,143296 +VT,total,1990,564798 +VT,total,1991,568606 +VT,under18,1991,145219 +VT,total,1993,577748 +VT,under18,1993,148705 +VT,under18,1992,146983 +VT,total,1992,572751 +VT,under18,1994,150794 +VT,total,1994,583836 +VT,total,1995,589003 +VT,under18,1995,151439 +VT,under18,1996,151490 +VT,total,1996,593701 +VT,under18,1998,148467 +VT,total,1998,600416 +VT,total,1997,597239 +VT,under18,1997,150040 +VT,total,2000,609618 +VT,under18,2000,147549 +VT,total,1999,604683 +VT,under18,1999,147859 +VT,total,2001,612223 +VT,under18,2001,146040 +VT,total,2002,615442 +VT,under18,2002,144441 +VT,total,2004,619920 +VT,under18,2004,141068 +VT,total,2003,617858 +VT,under18,2003,142718 +VT,total,2005,621215 +VT,under18,2005,138933 +VT,total,2006,622892 +VT,under18,2006,136731 +VT,total,2008,624151 +VT,under18,2008,132600 +VT,total,2007,623481 +VT,under18,2007,134695 +VT,under18,2013,122701 +VT,total,2013,626630 +VT,total,2009,624817 +VT,under18,2009,130450 +VT,total,2010,625793 +VT,under18,2010,128601 +VT,under18,2011,126500 +VT,total,2011,626320 +VT,under18,2012,124555 +VT,total,2012,625953 +VA,total,2012,8186628 +VA,under18,2012,1861323 +VA,total,2011,8105850 +VA,under18,2011,1857585 +VA,under18,2010,1855025 +VA,total,2010,8024417 +VA,total,2009,7925937 +VA,under18,2009,1845132 +VA,total,2013,8260405 +VA,under18,2013,1864535 +VA,total,2007,7751000 +VA,under18,2007,1834386 +VA,total,2008,7833496 +VA,under18,2008,1838361 +VA,total,2005,7577105 +VA,under18,2005,1816270 +VA,total,2006,7673725 +VA,under18,2006,1826368 +VA,total,2003,7366977 +VA,under18,2003,1782254 +VA,total,2004,7475575 +VA,under18,2004,1801958 +VA,total,2002,7286873 +VA,under18,2002,1771247 +VA,total,2001,7198362 +VA,under18,2001,1754549 +VA,total,1999,7000174 +VA,under18,1999,1723125 +VA,total,2000,7105817 +VA,under18,2000,1741420 +VA,total,1997,6829183 +VA,under18,1997,1683766 +VA,under18,1998,1706261 +VA,total,1998,6900918 +VA,under18,1996,1664147 +VA,total,1996,6750884 +VA,total,1995,6670693 +VA,under18,1995,1649005 +VA,under18,1994,1628711 +VA,total,1994,6593139 +VA,under18,1992,1581544 +VA,total,1992,6414307 +VA,total,1993,6509630 +VA,under18,1993,1604758 +VA,total,1991,6301217 +VA,under18,1991,1548258 +VA,under18,1990,1520670 +VA,total,1990,6216884 +WA,under18,1990,1301545 +WA,total,1990,4903043 +WA,total,1991,5025624 +WA,under18,1991,1326527 +WA,total,1993,5278842 +WA,under18,1993,1387716 +WA,under18,1992,1365480 +WA,total,1992,5160757 +WA,under18,1994,1409922 +WA,total,1994,5375161 +WA,total,1995,5481027 +WA,under18,1995,1429397 +WA,under18,1996,1449613 +WA,total,1996,5569753 +WA,under18,1998,1494784 +WA,total,1998,5769562 +WA,total,1997,5674747 +WA,under18,1997,1473646 +WA,total,2000,5910512 +WA,under18,2000,1516361 +WA,total,1999,5842564 +WA,under18,1999,1507824 +WA,total,2001,5985722 +WA,under18,2001,1517527 +WA,total,2002,6052349 +WA,under18,2002,1517655 +WA,total,2004,6178645 +WA,under18,2004,1520751 +WA,total,2003,6104115 +WA,under18,2003,1514877 +WA,total,2005,6257305 +WA,under18,2005,1523890 +WA,total,2006,6370753 +WA,under18,2006,1536926 +WA,total,2008,6562231 +WA,under18,2008,1560302 +WA,total,2007,6461587 +WA,under18,2007,1549582 +WA,under18,2013,1595795 +WA,total,2013,6971406 +WA,total,2009,6667426 +WA,under18,2009,1574403 +WA,total,2010,6742256 +WA,under18,2010,1581436 +WA,under18,2011,1584709 +WA,total,2011,6821481 +WA,under18,2012,1588451 +WA,total,2012,6895318 +WV,total,2012,1856680 +WV,under18,2012,384030 +WV,total,2011,1855184 +WV,under18,2011,385283 +WV,under18,2010,387224 +WV,total,2010,1854146 +WV,total,2009,1847775 +WV,under18,2009,389036 +WV,total,2013,1854304 +WV,under18,2013,381678 +WV,total,2007,1834052 +WV,under18,2007,390661 +WV,total,2008,1840310 +WV,under18,2008,390210 +WV,total,2006,1827912 +WV,under18,2006,390637 +WV,total,2005,1820492 +WV,under18,2005,390431 +WV,total,2003,1812295 +WV,under18,2003,392460 +WV,total,2004,1816438 +WV,under18,2004,391856 +WV,total,2002,1805414 +WV,under18,2002,393569 +WV,total,2001,1801481 +WV,under18,2001,395307 +WV,total,1999,1811799 +WV,under18,1999,406784 +WV,total,2000,1807021 +WV,under18,2000,401062 +WV,total,1997,1819113 +WV,under18,1997,418037 +WV,under18,1998,412793 +WV,total,1998,1815609 +WV,under18,1996,422831 +WV,total,1996,1822808 +WV,total,1995,1823700 +WV,under18,1995,428790 +WV,under18,1994,429128 +WV,total,1994,1820421 +WV,under18,1992,433116 +WV,total,1992,1806451 +WV,total,1993,1817539 +WV,under18,1993,432364 +WV,total,1991,1798735 +WV,under18,1991,433918 +WV,under18,1990,436797 +WV,total,1990,1792548 +WI,under18,1990,1302869 +WI,total,1990,4904562 +WI,total,1991,4964343 +WI,under18,1991,1314855 +WI,total,1993,5084889 +WI,under18,1993,1337334 +WI,under18,1992,1330555 +WI,total,1992,5025398 +WI,under18,1994,1348110 +WI,total,1994,5133678 +WI,total,1995,5184836 +WI,under18,1995,1351343 +WI,under18,1996,1352877 +WI,total,1996,5229986 +WI,under18,1998,1362907 +WI,total,1998,5297673 +WI,total,1997,5266213 +WI,under18,1997,1359712 +WI,total,1999,5332666 +WI,under18,1999,1367019 +WI,total,2000,5373999 +WI,under18,2000,1370440 +WI,total,2001,5406835 +WI,under18,2001,1367593 +WI,total,2002,5445162 +WI,under18,2002,1365315 +WI,total,2004,5514026 +WI,under18,2004,1354643 +WI,total,2003,5479203 +WI,under18,2003,1358505 +WI,total,2005,5546166 +WI,under18,2005,1349866 +WI,total,2006,5577655 +WI,under18,2006,1348785 +WI,total,2008,5640996 +WI,under18,2008,1345573 +WI,total,2007,5610775 +WI,under18,2007,1348901 +WI,under18,2013,1307776 +WI,total,2013,5742713 +WI,total,2009,5669264 +WI,under18,2009,1342411 +WI,total,2010,5689060 +WI,under18,2010,1336094 +WI,under18,2011,1325870 +WI,total,2011,5708785 +WI,under18,2012,1316113 +WI,total,2012,5724554 +WY,total,2012,576626 +WY,under18,2012,136526 +WY,total,2011,567329 +WY,under18,2011,135407 +WY,under18,2010,135351 +WY,total,2010,564222 +WY,total,2009,559851 +WY,under18,2009,134960 +WY,total,2013,582658 +WY,under18,2013,137679 +WY,total,2007,534876 +WY,under18,2007,128760 +WY,total,2008,546043 +WY,under18,2008,131511 +WY,total,2006,522667 +WY,under18,2006,125525 +WY,total,2005,514157 +WY,under18,2005,124022 +WY,total,2003,503453 +WY,under18,2003,124182 +WY,total,2004,509106 +WY,under18,2004,123974 +WY,total,2002,500017 +WY,under18,2002,125495 +WY,total,2001,494657 +WY,under18,2001,126212 +WY,total,2000,494300 +WY,under18,2000,128774 +WY,total,1999,491780 +WY,under18,1999,130793 +WY,total,1997,489452 +WY,under18,1997,134328 +WY,under18,1998,132602 +WY,total,1998,490787 +WY,under18,1996,135698 +WY,total,1996,488167 +WY,total,1995,485160 +WY,under18,1995,136785 +WY,under18,1994,137733 +WY,total,1994,480283 +WY,under18,1992,137308 +WY,total,1992,466251 +WY,total,1993,473081 +WY,under18,1993,137458 +WY,total,1991,459260 +WY,under18,1991,136720 +WY,under18,1990,136078 +WY,total,1990,453690 +PR,under18,1990,NaN +PR,total,1990,NaN +PR,total,1991,NaN +PR,under18,1991,NaN +PR,total,1993,NaN +PR,under18,1993,NaN +PR,under18,1992,NaN +PR,total,1992,NaN +PR,under18,1994,NaN +PR,total,1994,NaN +PR,total,1995,NaN +PR,under18,1995,NaN +PR,under18,1996,NaN +PR,total,1996,NaN +PR,under18,1998,NaN +PR,total,1998,NaN +PR,total,1997,NaN +PR,under18,1997,NaN +PR,total,1999,NaN +PR,under18,1999,NaN +PR,total,2000,3810605 +PR,under18,2000,1089063 +PR,total,2001,3818774 +PR,under18,2001,1077566 +PR,total,2002,3823701 +PR,under18,2002,1065051 +PR,total,2004,3826878 +PR,under18,2004,1035919 +PR,total,2003,3826095 +PR,under18,2003,1050615 +PR,total,2005,3821362 +PR,under18,2005,1019447 +PR,total,2006,3805214 +PR,under18,2006,998543 +PR,total,2007,3782995 +PR,under18,2007,973613 +PR,total,2008,3760866 +PR,under18,2008,945705 +PR,under18,2013,814068 +PR,total,2013,3615086 +PR,total,2009,3740410 +PR,under18,2009,920794 +PR,total,2010,3721208 +PR,under18,2010,896945 +PR,under18,2011,869327 +PR,total,2011,3686580 +PR,under18,2012,841740 +PR,total,2012,3651545 +USA,under18,1990,64218512 +USA,total,1990,249622814 +USA,total,1991,252980942 +USA,under18,1991,65313018 +USA,under18,1992,66509177 +USA,total,1992,256514231 +USA,total,1993,259918595 +USA,under18,1993,67594938 +USA,under18,1994,68640936 +USA,total,1994,263125826 +USA,under18,1995,69473140 +USA,under18,1996,70233512 +USA,total,1995,266278403 +USA,total,1996,269394291 +USA,total,1997,272646932 +USA,under18,1997,70920738 +USA,under18,1998,71431406 +USA,total,1998,275854116 +USA,under18,1999,71946051 +USA,total,2000,282162411 +USA,under18,2000,72376189 +USA,total,1999,279040181 +USA,total,2001,284968955 +USA,under18,2001,72671175 +USA,total,2002,287625193 +USA,under18,2002,72936457 +USA,total,2003,290107933 +USA,under18,2003,73100758 +USA,total,2004,292805298 +USA,under18,2004,73297735 +USA,total,2005,295516599 +USA,under18,2005,73523669 +USA,total,2006,298379912 +USA,under18,2006,73757714 +USA,total,2007,301231207 +USA,under18,2007,74019405 +USA,total,2008,304093966 +USA,under18,2008,74104602 +USA,under18,2013,73585872 +USA,total,2013,316128839 +USA,total,2009,306771529 +USA,under18,2009,74134167 +USA,under18,2010,74119556 +USA,total,2010,309326295 +USA,under18,2011,73902222 +USA,total,2011,311582564 +USA,under18,2012,73708179 +USA,total,2012,313873685 -- GitLab