{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "import pandas as pd\n", "# 数据分析BI-------->人工智能AI\n", "# 数据分析和数据挖掘一个意思,\n", "# 工具和软件:Excel 免费版\n", "# SPSS(一人一年10000)、SAS(一人一年5000)、Matlab 收费\n", "# R、Python(全方位语言,流行) 免费\n", "# Python + numpy + scipy + pandas + matplotlib + seaborn + pyEcharts + sklearn + kereas(Tensorflow)+…… \n", "# 代码,自动化(数据输入----输出结果)\n", "from pandas import Series,DataFrame" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "a 63\n", "b 107\n", "c 16\n", "d 35\n", "e 140\n", "f 83\n", "dtype: int32" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 多层索引,行列\n", "# 单层索引\n", "s = Series(np.random.randint(0,150,size = 6),index=list('abcdef'))\n", "s" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "张三 期中 114\n", " 期末 131\n", "李四 期中 3\n", " 期末 63\n", "王五 期中 107\n", " 期末 34\n", "dtype: int32" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 多层索引,两层,三层以上(规则一样)\n", "s2 = Series(np.random.randint(0,150,size = 6),index = pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]))\n", "s2" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": true }, "outputs": [ { "ename": "NameError", "evalue": "name 'DataFrame' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m150\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Python'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'En'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Math'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMultiIndex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_product\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'张三'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'李四'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'王五'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'期中'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'期末'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'DataFrame' is not defined" ] } ], "source": [ "df = DataFrame(np.random.randint(0,150,size = (6,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]) )\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PythonEnMath
张三期中A153117
B8256123
期末A14278
B695017
李四期中A9187143
B12011839
期末A567655
B11105121
王五期中A147781
B128126146
期末A4945114
B1212677
\n", "
" ], "text/plain": [ " Python En Math\n", "张三 期中 A 15 31 17\n", " B 82 56 123\n", " 期末 A 14 2 78\n", " B 69 50 17\n", "李四 期中 A 91 87 143\n", " B 120 118 39\n", " 期末 A 56 76 55\n", " B 11 105 121\n", "王五 期中 A 147 78 1\n", " B 128 126 146\n", " 期末 A 49 45 114\n", " B 121 26 77" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 三层索引\n", "df3 = DataFrame(np.random.randint(0,150,size = (12,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末'],['A','B']]) )\n", "\n", "df3" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "73" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 先获取列后获取行\n", "df['Python']['张三']['期中']" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "df2 = df.copy()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PythonEnMath
张三期中73525
期末373656
李四期中14981142
期末711380
王五期中1194103
期末2512183
\n", "
" ], "text/plain": [ " Python En Math\n", "张三 期中 73 5 25\n", " 期末 37 36 56\n", "李四 期中 149 81 142\n", " 期末 71 138 0\n", "王五 期中 11 94 103\n", " 期末 25 121 83" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.sort_index()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "73" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 先获取行,后获取列\n", "df.loc['张三'].loc['期中']['Python']" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PythonEnMath
张三期中73525
期末373656
\n", "
" ], "text/plain": [ " Python En Math\n", "张三 期中 73 5 25\n", " 期末 37 36 56" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[[0,1]]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 2 }