{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "import pandas as pd\n", "from pandas import Series,DataFrame" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 上一讲,append,concat数据集成方法\n", "# merge融合,根据某一共同属性进行级联,高级用法" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namesexid
0A1
1B2
2C3
3D4
4E5
5F6
\n", "
" ], "text/plain": [ " name sex id\n", "0 A 男 1\n", "1 B 女 2\n", "2 C 女 3\n", "3 D 女 4\n", "4 E 男 5\n", "5 F 男 6" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = DataFrame({'name':['A','B','C','D','E','F'],\n", " 'sex':['男','女','女','女','男','男'],\n", " 'id':[1,2,3,4,5,6]})\n", "df1" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agesalaryid
022120001
125150002
227200003
321300004
418100005
52980007
\n", "
" ], "text/plain": [ " age salary id\n", "0 22 12000 1\n", "1 25 15000 2\n", "2 27 20000 3\n", "3 21 30000 4\n", "4 18 10000 5\n", "5 29 8000 7" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = DataFrame({'age':[22,25,27,21,18,29],'salary':[12000,15000,20000,30000,10000,8000],'id':[1,2,3,4,5,7]})\n", "df2" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\python36\\lib\\site-packages\\pandas\\core\\frame.py:6692: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", "of pandas will change to not sort by default.\n", "\n", "To accept the future behavior, pass 'sort=False'.\n", "\n", "To retain the current behavior and silence the warning, pass 'sort=True'.\n", "\n", " sort=sort)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageidnamesalarysex
0NaN1ANaN
1NaN2BNaN
2NaN3CNaN
3NaN4DNaN
4NaN5ENaN
5NaN6FNaN
022.01NaN12000.0NaN
125.02NaN15000.0NaN
227.03NaN20000.0NaN
321.04NaN30000.0NaN
418.05NaN10000.0NaN
529.07NaN8000.0NaN
\n", "
" ], "text/plain": [ " age id name salary sex\n", "0 NaN 1 A NaN 男\n", "1 NaN 2 B NaN 女\n", "2 NaN 3 C NaN 女\n", "3 NaN 4 D NaN 女\n", "4 NaN 5 E NaN 男\n", "5 NaN 6 F NaN 男\n", "0 22.0 1 NaN 12000.0 NaN\n", "1 25.0 2 NaN 15000.0 NaN\n", "2 27.0 3 NaN 20000.0 NaN\n", "3 21.0 4 NaN 30000.0 NaN\n", "4 18.0 5 NaN 10000.0 NaN\n", "5 29.0 7 NaN 8000.0 NaN" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.append(df2)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namesexidagesalaryid
0A122120001
1B225150002
2C327200003
3D421300004
4E518100005
5F62980007
\n", "
" ], "text/plain": [ " name sex id age salary id\n", "0 A 男 1 22 12000 1\n", "1 B 女 2 25 15000 2\n", "2 C 女 3 27 20000 3\n", "3 D 女 4 21 30000 4\n", "4 E 男 5 18 10000 5\n", "5 F 男 6 29 8000 7" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([df1,df2],axis = 1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namesexidagesalary
0A12212000
1B22515000
2C32720000
3D42130000
4E51810000
\n", "
" ], "text/plain": [ " name sex id age salary\n", "0 A 男 1 22 12000\n", "1 B 女 2 25 15000\n", "2 C 女 3 27 20000\n", "3 D 女 4 21 30000\n", "4 E 男 5 18 10000" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.merge(df2)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namesexidagesalary
0A122.012000.0
1B225.015000.0
2C327.020000.0
3D421.030000.0
4E518.010000.0
5F6NaNNaN
6NaNNaN729.08000.0
\n", "
" ], "text/plain": [ " name sex id age salary\n", "0 A 男 1 22.0 12000.0\n", "1 B 女 2 25.0 15000.0\n", "2 C 女 3 27.0 20000.0\n", "3 D 女 4 21.0 30000.0\n", "4 E 男 5 18.0 10000.0\n", "5 F 男 6 NaN NaN\n", "6 NaN NaN 7 29.0 8000.0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.merge(df2,how = 'outer')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PythonMathEn
A401590
B595283
C14138137
D897853
E811013
F757986
\n", "
" ], "text/plain": [ " Python Math En\n", "A 40 15 90\n", "B 59 52 83\n", "C 14 138 137\n", "D 89 78 53\n", "E 81 101 3\n", "F 75 79 86" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCDEF'),columns=['Python','Math','En'])\n", "df" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "Python 59.7\n", "Math 77.2\n", "En 75.3\n", "dtype: float64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s = df.mean().round(1)\n", "s" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_mean
Python59.7
Math77.2
En75.3
\n", "
" ], "text/plain": [ " score_mean\n", "Python 59.7\n", "Math 77.2\n", "En 75.3" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = DataFrame(s)\n", "df2.columns = ['score_mean']\n", "df2" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PythonMathEn
score_mean59.777.275.3
\n", "
" ], "text/plain": [ " Python Math En\n", "score_mean 59.7 77.2 75.3" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = df2.T\n", "df3" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PythonMathEn
A40.015.090.0
B59.052.083.0
C14.0138.0137.0
D89.078.053.0
E81.0101.03.0
F75.079.086.0
score_mean59.777.275.3
\n", "
" ], "text/plain": [ " Python Math En\n", "A 40.0 15.0 90.0\n", "B 59.0 52.0 83.0\n", "C 14.0 138.0 137.0\n", "D 89.0 78.0 53.0\n", "E 81.0 101.0 3.0\n", "F 75.0 79.0 86.0\n", "score_mean 59.7 77.2 75.3" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4 = df.append(df3)\n", "df4" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_mean
A48.3
B64.7
C96.3
D73.3
E61.7
F80.0
score_mean70.7
\n", "
" ], "text/plain": [ " score_mean\n", "A 48.3\n", "B 64.7\n", "C 96.3\n", "D 73.3\n", "E 61.7\n", "F 80.0\n", "score_mean 70.7" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df5 = DataFrame(df4.mean(axis = 1).round(1))\n", "df5.columns = ['score_mean']\n", "df5" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PythonMathEnscore_mean
A40.015.090.048.3
B59.052.083.064.7
C14.0138.0137.096.3
D89.078.053.073.3
E81.0101.03.061.7
F75.079.086.080.0
score_mean59.777.275.370.7
\n", "
" ], "text/plain": [ " Python Math En score_mean\n", "A 40.0 15.0 90.0 48.3\n", "B 59.0 52.0 83.0 64.7\n", "C 14.0 138.0 137.0 96.3\n", "D 89.0 78.0 53.0 73.3\n", "E 81.0 101.0 3.0 61.7\n", "F 75.0 79.0 86.0 80.0\n", "score_mean 59.7 77.2 75.3 70.7" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4.merge(df5,left_index=True,right_index=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 2 }