From a6c94b4e4139ff59f2cb7855a460e408762675b9 Mon Sep 17 00:00:00 2001
From: wizardforcel <562826179@qq.com>
Date: Wed, 9 Jan 2019 17:03:09 +0800
Subject: [PATCH] 7.1-7.5

---
 docs/7.2.md | 265 ++++++++++++++++++++++++++++
 docs/7.3.md |  58 ++++++
 docs/7.4.md | 498 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 docs/7.5.md | 437 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1258 insertions(+)
 create mode 100644 docs/7.2.md
 create mode 100644 docs/7.3.md
 create mode 100644 docs/7.4.md
 create mode 100644 docs/7.5.md

diff --git a/docs/7.2.md b/docs/7.2.md
new file mode 100644
index 0000000..e8475b1
--- /dev/null
+++ b/docs/7.2.md
@@ -0,0 +1,265 @@
+# 7.2 数据整理
+
+> 原文：[Data Wrangling](https://github.com/donnemartin/viz/blob/master/githubstats/data_wrangling.ipynb)
+> 
+> 译者：[飞龙](https://github.com/wizardforcel)
+> 
+> 协议：[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)（原文协议：[Apache License 2.0](https://github.com/donnemartin/data-science-ipython-notebooks/blob/master/LICENSE)）
+
+## 数据流
+
+![Imgur](../img/7-2-1.png)
+
+直接从 GitHub 挖掘数据，`Viz`由 [GitHub API](https://developer.github.com/v3/) 提供支持，并利用以下内容：
+* 通过 Python 使用[`github3.py`](https://github.com/sigmavirus24/github3.py) 访问 GitHub API。
+* 将下面的 [IPython 笔记本](https://github.com/donnemartin/viz/blob/master/githubstats/data_wrangling.ipynb) 中的[`pandas`](https://github.com/pydata/pandas)用于数据整理。
+* 通过[`geocoder`](https://github.com/DenisCarriere/geocoder)使用 [Google 地图 API](https://developers.google.com/maps/?hl=en) 访问位置数据。
+* 将 [Tableau Public](https://public.tableau.com/s/) 用于可视化。
+
+将来，[Google BigQuery](https://cloud.google.com/bigquery/) 和 [GitHub Archive](https://www.githubarchive.org/) 也可以补充 GitHub API。
+
+## 导入
+
+```py
+import re
+
+import pandas as pd
+```
+
+## 准备仓库数据
+
+加载仓库数据并删除重复：
+
+```py
+repos = pd.read_csv("data/2017/repos-dump.csv", quotechar='"', skipinitialspace=True)
+print('Shape before dropping duplicates', repos.shape)
+repos = repos.drop_duplicates(subset='full_name', keep='last')
+print('Shape after  dropping duplicates', repos.shape)
+repos.head()
+
+'''
+Shape before dropping duplicates (8697, 5)
+Shape after  dropping duplicates (8697, 5)
+'''
+```
+
+|  | full_name | stars | forks | description | language |
+| --- | --- | --- | --- | --- | --- | --- |
+| 0 | thedaviddias/Front-End-Checklist | 24267 | 2058 | 🗂 The perfect Front-End Checklist for modern w... | JavaScript |
+| 1 | GoogleChrome/puppeteer | 21976 | 1259 | Headless Chrome Node API | JavaScript |
+| 2 | parcel-bundler/parcel | 13981 | 463 | 📦🚀 Blazing fast, zero configuration web applic... | JavaScript |
+| 3 | Chalarangelo/30-seconds-of-code | 13466 | 1185 | Curated collection of useful Javascript snippe... | JavaScript |
+| 4 | wearehive/project-guidelines | 11279 | 970 | A set of best practices for JavaScript projects | JavaScript |
+
+将`user`和`repo`从`full_name`分离，变成新列：
+
+```py
+def extract_user(line):
+    return line.split('/')[0]
+
+def extract_repo(line):
+    return line.split('/')[1]
+
+repos['user'] = repos['full_name'].str[:].apply(extract_user)
+repos['repo'] = repos['full_name'].str[:].apply(extract_repo)
+print(repos.shape)
+repos.head()
+
+# (8697, 7)
+```
+
+|  | full_name | stars | forks | description | language | user | repo |
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | thedaviddias/Front-End-Checklist | 24267 | 2058 | 🗂 The perfect Front-End Checklist for modern w... | JavaScript | thedaviddias | Front-End-Checklist |
+| 1 | GoogleChrome/puppeteer | 21976 | 1259 | Headless Chrome Node API | JavaScript | GoogleChrome | puppeteer |
+| 2 | parcel-bundler/parcel | 13981 | 463 | 📦🚀 Blazing fast, zero configuration web applic... | JavaScript | parcel-bundler | parcel |
+| 3 | Chalarangelo/30-seconds-of-code | 13466 | 1185 | Curated collection of useful Javascript snippe... | JavaScript | Chalarangelo | 30-seconds-of-code |
+| 4 | wearehive/project-guidelines | 11279 | 970 | A set of best practices for JavaScript projects | JavaScript | wearehive | project-guidelines |
+
+## 准备用户数据
+
+加载用户数据并删除重复：
+
+```py
+users = pd.read_csv("data/2017/user-geocodes-dump.csv", quotechar='"', skipinitialspace=True)
+print('Shape before dropping duplicates', users.shape)
+users = users.drop_duplicates(subset='id', keep='last')
+print('Shape after  dropping duplicates', users.shape)
+users.head()
+
+'''
+Shape before dropping duplicates (6426, 8)
+Shape after  dropping duplicates (6426, 8)
+'''
+```
+
+|  | id | name | type | location | lat | long | city | country |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | dns-violations | NaN | Organization | NaN | NaN | NaN | NaN | NaN |
+| 1 | hannob | Hanno Böck | User | Berlin | 52.520007 | 13.404954 | Berlin | Germany |
+| 2 | takecian | Takeshi Fujiki | User | Tokyo, Japan | 35.689487 | 139.691706 | Tokyo | Japan |
+| 3 | jtomschroeder | Tom Schroeder | User | Chicago | 41.878114 | -87.629798 | Chicago | United States |
+| 4 | wapiflapi | Wannes Rombouts | User | France | 46.227638 | 2.213749 | NaN | France |
+
+将`id`列重命名为`user`：
+
+```py
+users.rename(columns={'id': 'user'}, inplace=True)
+users.head()
+```
+
+|  | user | name | type | location | lat | long | city | country |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | dns-violations | NaN | Organization | NaN | NaN | NaN | NaN | NaN |
+| 1 | hannob | Hanno Böck | User | Berlin | 52.520007 | 13.404954 | Berlin | Germany |
+| 2 | takecian | Takeshi Fujiki | User | Tokyo, Japan | 35.689487 | 139.691706 | Tokyo | Japan |
+| 3 | jtomschroeder | Tom Schroeder | User | Chicago | 41.878114 | -87.629798 | Chicago | United States |
+| 4 | wapiflapi | Wannes Rombouts | User | France | 46.227638 | 2.213749 | NaN | France |
+
+## 合并仓库和用户数据
+
+左连接仓库和用户：
+
+```py
+repos_users = pd.merge(repos, users, on='user', how='left')
+print('Shape repos:', repos.shape)
+print('Shape users:', users.shape)
+print('Shape repos_users:', repos_users.shape)
+repos_users.head()
+
+'''
+Shape repos: (8697, 7)
+Shape users: (6426, 8)
+Shape repos_users: (8697, 14)
+'''
+```
+
+|  | full_name | stars | forks | description | language | user | repo | name | type | location | lat | long | city | country |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | thedaviddias/Front-End-Checklist | 24267 | 2058 | 🗂 The perfect Front-End Checklist for modern w... | JavaScript | thedaviddias | Front-End-Checklist | David Dias | User | France, Mauritius, Canada | NaN | NaN | NaN | NaN |
+| 1 | GoogleChrome/puppeteer | 21976 | 1259 | Headless Chrome Node API | JavaScript | GoogleChrome | puppeteer | NaN | Organization | NaN | NaN | NaN | NaN | NaN |
+| 2 | parcel-bundler/parcel | 13981 | 463 | 📦🚀 Blazing fast, zero configuration web applic... | JavaScript | parcel-bundler | parcel | Parcel | Organization | NaN | NaN | NaN | NaN | NaN |
+| 3 | Chalarangelo/30-seconds-of-code | 13466 | 1185 | Curated collection of useful Javascript snippe... | JavaScript | Chalarangelo | 30-seconds-of-code | Angelos Chalaris | User | Athens, Greece | 37.983810 | 23.727539 | Athens | Greece |
+| 4 | wearehive/project-guidelines | 11279 | 970 | A set of best practices for JavaScript projects | JavaScript | wearehive | project-guidelines | Hive | Organization | London | 51.507351 | -0.127758 | London | United Kingdom |
+
+## 整理仓库和用户数据
+
+重新排序列：
+
+```py
+repos_users = repos_users.reindex_axis(['full_name',
+                                        'repo',
+                                        'description',
+                                        'stars',
+                                        'forks',
+                                        'language',
+                                        'user',
+                                        'name',
+                                        'type',
+                                        'location',
+                                        'lat',
+                                        'long',
+                                        'city',
+                                        'country'], axis=1)
+print(repos_users.shape)
+repos_users.head()
+
+# (8697, 14)
+```
+
+|  | full_name | repo | description | stars | forks | language | user | name | type | location | lat | long | city | country |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | thedaviddias/Front-End-Checklist | Front-End-Checklist | 🗂 The perfect Front-End Checklist for modern w... | 24267 | 2058 | JavaScript | thedaviddias | David Dias | User | France, Mauritius, Canada | NaN | NaN | NaN | NaN |
+| 1 | GoogleChrome/puppeteer | puppeteer | Headless Chrome Node API | 21976 | 1259 | JavaScript | GoogleChrome | NaN | Organization | NaN | NaN | NaN | NaN | NaN |
+| 2 | parcel-bundler/parcel | parcel | 📦🚀 Blazing fast, zero configuration web applic... | 13981 | 463 | JavaScript | parcel-bundler | Parcel | Organization | NaN | NaN | NaN | NaN | NaN |
+| 3 | Chalarangelo/30-seconds-of-code | 30-seconds-of-code | Curated collection of useful Javascript snippe... | 13466 | 1185 | JavaScript | Chalarangelo | Angelos Chalaris | User | Athens, Greece | 37.983810 | 23.727539 | Athens | Greece |
+| 4 | wearehive/project-guidelines | project-guidelines | A set of best practices for JavaScript projects | 11279 | 970 | JavaScript | wearehive | Hive | Organization | London | 51.507351 | -0.127758 | London | United Kingdom |
+
+## 添加整体排名
+
+根据星数对每个元素排名：
+
+```py
+repos_users['rank'] = repos_users['stars'].rank(ascending=False)
+print(repos_users.shape)
+repos_users.head()
+
+# (8697, 15)
+```
+
+|  | full_name | repo | description | stars | forks | language | user | name | type | location | lat | long | city | country | rank |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | thedaviddias/Front-End-Checklist | Front-End-Checklist | 🗂 The perfect Front-End Checklist for modern w... | 24267 | 2058 | JavaScript | thedaviddias | David Dias | User | France, Mauritius, Canada | NaN | NaN | NaN | NaN | 3 |
+| 1 | GoogleChrome/puppeteer | puppeteer | Headless Chrome Node API | 21976 | 1259 | JavaScript | GoogleChrome | NaN | Organization | NaN | NaN | NaN | NaN | NaN | 4 |
+| 2 | parcel-bundler/parcel | parcel | 📦🚀 Blazing fast, zero configuration web applic... | 13981 | 463 | JavaScript | parcel-bundler | Parcel | Organization | NaN | NaN | NaN | NaN | NaN | 11 |
+| 3 | Chalarangelo/30-seconds-of-code | 30-seconds-of-code | Curated collection of useful Javascript snippe... | 13466 | 1185 | JavaScript | Chalarangelo | Angelos Chalaris | User | Athens, Greece | 37.983810 | 23.727539 | Athens | Greece | 13 |
+| 4 | wearehive/project-guidelines | project-guidelines | A set of best practices for JavaScript projects | 11279 | 970 | JavaScript | wearehive | Hive | Organization | London | 51.507351 | -0.127758 | London | United Kingdom | 16 |
+
+## 验证结果：用户
+
+等价于[ GitHub 搜索查询](https://github.com/search?utf8=%E2%9C%93&q=created%3A2017-01-01..2017-12-31+stars%3A%3E%3D100+user%3Adonnemartin&type=Repositories&ref=searchresults)：`created：2017-01-01..2017-12-31 stars：> = 100 user：donnemartin`
+
+注意：数据可能稍微差了一些，因为搜索查询将考虑执行查询时的数据。 此笔记本中的数据于 2017 年 1 月 1 日采集，来“冻结” 2017 年的结果。从 2017 年 1 月 1 日开始，执行搜索的时间越长，差异越大。
+
+```py
+repos_users[repos_users['user'] == 'donnemartin']
+```
+
+|  | full_name | repo | description | stars | forks | language | user | name | type | location | lat | long | city | country | rank |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 3308 | donnemartin/system-design-primer | system-design-primer | Learn how to design large-scale systems. Prep ... | 21780 | 2633 | Python | donnemartin | Donne Martin | User | Washington, D.C. | 38.907192 | -77.036871 | Washington | United States | 5 |
+
+## 验证结果：Python 仓库
+
+等价于[ GitHub 搜索查询](https://github.com/search?utf8=%E2%9C%93&q=created%3A2017-01-01..2017-12-31+stars%3A%3E%3D100+language%3Apython&type=Repositories&ref=searchresults)：`created:2017-01-01..2017-12-31 stars:>=100 language:python`
+
+注意：数据可能稍微差了一些，因为搜索查询将考虑执行查询时的数据。 此笔记本中的数据于 2017 年 1 月 1 日采集，来“冻结” 2017 年的结果。从 2017 年 1 月 1 日开始，执行搜索的时间越长，差异越大。
+
+```py
+print(repos_users[repos_users['language'] == 'Python'].shape)
+repos_users[repos_users['language'] == 'Python'].head()
+
+# (1357, 15)
+```
+
+|  | full_name | repo | description | stars | forks | language | user | name | type | location | lat | long | city | country | rank |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 3308 | donnemartin/system-design-primer | system-design-primer | Learn how to design large-scale systems. Prep ... | 21780 | 2633 | Python | donnemartin | Donne Martin | User | Washington, D.C. | 38.907192 | -77.036871 | Washington | United States | 5 |
+| 3309 | python/cpython | cpython | The Python programming language | 15060 | 3779 | Python | python | Python | Organization | NaN | NaN | NaN | NaN | NaN | 9 |
+| 3310 | ageitgey/face_recognition | face_recognition | The world's simplest facial recognition api fo... | 8487 | 1691 | Python | ageitgey | Adam Geitgey | User | Various places | NaN | NaN | NaN | NaN | 31 |
+| 3311 | tonybeltramelli/pix2code | pix2code | pix2code: Generating Code from a Graphical Use... | 8037 | 605 | Python | tonybeltramelli | Tony Beltramelli | User | Denmark | NaN | NaN | NaN | NaN | 34 |
+| 3312 | google/python-fire | python-fire | Python Fire is a library for automatically gen... | 7663 | 386 | Python | google | Google | Organization | NaN | NaN | NaN | NaN | NaN | 36 |
+
+## 验证结果：所有仓库
+
+等价于[ GitHub 搜索查询](https://github.com/search?utf8=%E2%9C%93&q=created%3A2017-01-01..2017-12-31+stars%3A%3E%3D100&type=Repositories&ref=searchresults)：`created:2017-01-01..2017-12-31 stars:>=100`
+
+注意：数据可能稍微差了一些，因为搜索查询将考虑执行查询时的数据。 此笔记本中的数据于 2017 年 1 月 1 日采集，来“冻结” 2017 年的结果。从 2017 年 1 月 1 日开始，执行搜索的时间越长，差异越大。
+
+```py
+print(repos_users.shape)
+repos_users.head()
+
+# (8697, 15)
+```
+
+|  | full_name | repo | description | stars | forks | language | user | name | type | location | lat | long | city | country | rank |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| 0 | thedaviddias/Front-End-Checklist | Front-End-Checklist | 🗂 The perfect Front-End Checklist for modern w... | 24267 | 2058 | JavaScript | thedaviddias | David Dias | User | France, Mauritius, Canada | NaN | NaN | NaN | NaN | 3 |
+| 1 | GoogleChrome/puppeteer | puppeteer | Headless Chrome Node API | 21976 | 1259 | JavaScript | GoogleChrome | NaN | Organization | NaN | NaN | NaN | NaN | NaN | 4 |
+| 2 | parcel-bundler/parcel | parcel | 📦🚀 Blazing fast, zero configuration web applic... | 13981 | 463 | JavaScript | parcel-bundler | Parcel | Organization | NaN | NaN | NaN | NaN | NaN | 11 |
+| 3 | Chalarangelo/30-seconds-of-code | 30-seconds-of-code | Curated collection of useful Javascript snippe... | 13466 | 1185 | JavaScript | Chalarangelo | Angelos Chalaris | User | Athens, Greece | 37.983810 | 23.727539 | Athens | Greece | 13 |
+| 4 | wearehive/project-guidelines | project-guidelines | A set of best practices for JavaScript projects | 11279 | 970 | JavaScript | wearehive | Hive | Organization | London | 51.507351 | -0.127758 | London | United Kingdom | 16 |
+
+## 输出结果
+
+将结果写出到 csv 来在 Tableau 中可视化：
+
+```py
+users.to_csv('data/2017/users.csv', index=False)
+repos_users.to_csv('data/2017/repos-users-geocodes.csv', index=False)
+repos_users.to_csv('data/2017/repos-users.csv', index=False)
+
+repos_rank = repos_users.reindex_axis(['full_name', 'rank'], axis=1)
+repos_rank.to_csv('data/2017/repos-ranks.csv', index=False)
+```
diff --git a/docs/7.3.md b/docs/7.3.md
new file mode 100644
index 0000000..abad32d
--- /dev/null
+++ b/docs/7.3.md
@@ -0,0 +1,58 @@
+# 7.3 Pandas 数据操作
+
+> 原文：[Data Manipulation with Pandas](http://nbviewer.jupyter.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/03.00-Introduction-to-Pandas.ipynb)
+> 
+> 译者：[飞龙](https://github.com/wizardforcel)
+> 
+> 协议：[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)
+> 
+> 本节是[《Python 数据科学手册》](https://github.com/jakevdp/PythonDataScienceHandbook)（Python Data Science Handbook）的摘录。
+
+在前一章中，我们详细介绍了 NumPy 及其``ndarray``对象，它在 Python 中提供了密集类型数组的高效存储和操作。在这里，通过详细了解 Pandas 库提供的数据结构，我们将构建这些知识。
+
+Pandas 是一个基于 NumPy 构建的新软件包，它提供了高效的``DataFrame``实现。``DataFrame``本质上是多维数组，带有附加的行和列标签，通常具有异构类型和/或缺失数据。除了为标记数据提供方便的存储接口外，Pandas 还实现了许多强大数据操作，数据库框架和电子表格程序用户都熟悉它们。
+
+正如我们所看到的，NumPy 的``ndarray``数据结构为干净，组织良好的数据类型提供了必要的功能，它们通常出现在数值计算任务中。虽然它很好地服务于此目的，但当我们需要更多的灵活性（例如，将标签附加到数据，处理缺失数据等），以及尝试一些操作，它们不能很好地映射到逐元素广播时（例如， 分组，透视等），它的局限性就很明显了。每一项都是分析非结构化数据的重要部分，它以许多形式存在于我们周围的世界中。
+
+Pandas，特别是它的``Series``和``DataFrame``对象，建立在 NumPy 数组结构之上，可以高效访问这些占据数据科学家许多时间的“数据整理”任务。
+
+在本章中，我们将重点介绍有效使用``Series``，``DataFrame``和相关结构的机制。我们将在适当的地方使用从真实数据集中提取的示例，但这些示例不一定是重点。
+
+## 安装和使用 Pandas
+
+在系统上安装 Pandas 需要安装 NumPy，如果从源代码构建库，则需要使用适当的工具，来编译 C 和 Cython 源，Pandas 构建在它上面。安装的详细信息，请参见[ Pandas 文档](http://pandas.pydata.org/)。如果你遵循了“前言”中所述的建议，并使用 Anaconda 栈，则你已经安装了 Pandas。
+
+安装 Pandas 后，你可以导入它并检查版本：
+
+```py
+import pandas
+pandas.__version__
+
+# '0.18.1'
+```
+
+正如我们通常在别名``np``下导入 NumPy 一样，我们将在别名``pd``下导入 Pandas：
+
+```py
+import pandas as pd
+```
+
+此导入约定将在本书的其余部分中使用。
+
+## 关于内置文档的提示
+
+在阅读本章时，不要忘记 IPython 使你能够快速浏览包的内容（通过使用制表符补全功能）以及各种函数的文档（使用``?`` 字符）。（如果你需要回顾这个，请参阅“IPython 中的帮助和文档”。）
+
+例如，要显示 pandas 命名空间的所有内容，可以键入：
+
+```ipython
+In [3]: pd.<TAB>
+```
+
+要显示 Pandas 的内置文档，你可以使用：
+
+```ipython
+In [4]: pd?
+```
+
+可以在 <http://pandas.pydata.org/> 找到更详细的文档以及教程和其他资源。
diff --git a/docs/7.4.md b/docs/7.4.md
new file mode 100644
index 0000000..4cb9dc3
--- /dev/null
+++ b/docs/7.4.md
@@ -0,0 +1,498 @@
+# 7.4 Pandas 对象介绍
+
+> 原文：[Introducing Pandas Objects](http://nbviewer.jupyter.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/03.01-Introducing-Pandas-Objects.ipynb)
+> 
+> 译者：[飞龙](https://github.com/wizardforcel)
+> 
+> 协议：[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)
+> 
+> 本节是[《Python 数据科学手册》](https://github.com/jakevdp/PythonDataScienceHandbook)（Python Data Science Handbook）的摘录。
+
+在最基本的层面上，Pandas 对象可以认为是 NumPy 结构化数组的增强版本，其中行和列用标签而不是简单的整数索引来标识。我们将在本章的过程中看到，Pandas 在基本数据结构之上提供了许多有用的工具，方法和功能，但几乎所有后续内容都需要了解这些结构是什么。因此，在我们继续之前，让我们介绍这三个基本的 Pandas 数据结构：``Series``，``DataFrame``和``Index``。
+
+我们将使用标准的 NumPy 和 Pandas 导入，来启动我们的代码会话：
+
+```py
+import numpy as np
+import pandas as pd
+```
+
+## Pandas 序列对象
+
+Pandas ``Series``是带索引的数据的一维数组。它可以从列表或数组创建，如下所示：
+
+```py
+data = pd.Series([0.25, 0.5, 0.75, 1.0])
+data
+
+'''
+0    0.25
+1    0.50
+2    0.75
+3    1.00
+dtype: float64
+'''
+```
+
+我们在输出中看到，``Series``包含了一系列值和一系列索引，我们可以使用``values``和``index``属性来访问它们。
+
+``values``只是一个熟悉的 NumPy 数组：
+
+```py
+data.values
+
+# array([ 0.25,  0.5 ,  0.75,  1.  ])
+```
+
+``index``是类型为``pd.Index``的数组式对象，我们将在稍后详细讨论。
+
+```py
+data.index
+
+# RangeIndex(start=0, stop=4, step=1)
+```
+
+与 NumPy 数组一样，可以通过熟悉的 Python 方括号表示法，按照相关索引访问数据：
+
+```py
+data[1]
+
+# 0.5
+
+data[1:3]
+
+'''
+1    0.50
+2    0.75
+dtype: float64
+'''
+```
+
+然而，我们将要看到，Pandas ``Series``比它模仿的一维 NumPy 数组更加通用和灵活。
+
+### 作为扩展的 NumPy 数组的``Series``
+
+从目前来看，``Series``对象看起来基本上可以与一维 NumPy 数组互换。本质区别在于索引的存在：虽然 Numpy 数组拥有隐式定义的整数索引，用于访问值，Pandas ``Series``拥有显式定义的索引，与值关联。
+
+这个显式索引的定义，为``Series``对象提供了额外的功能。例如，索引不必是整数，还可以包含任何所需类型的值。例如，如果我们愿意，我们可以使用字符串作为索引：
+
+```py
+data = pd.Series([0.25, 0.5, 0.75, 1.0],
+                 index=['a', 'b', 'c', 'd'])
+data
+
+'''
+a    0.25
+b    0.50
+c    0.75
+d    1.00
+dtype: float64
+'''
+```
+
+项目的访问像预期一样工作：
+
+```py
+data['b']
+
+# 0.5
+```
+
+我们甚至可以使用非连续的索引：
+
+```py
+data = pd.Series([0.25, 0.5, 0.75, 1.0],
+                 index=[2, 5, 3, 7])
+data
+
+'''
+2    0.25
+5    0.50
+3    0.75
+7    1.00
+dtype: float64
+'''
+
+data[5]
+
+# 0.5
+```
+
+### 作为特化字典的序列
+
+通过这种方式，你可以将 Pandas ``Series`视为 Python 字典的特化。字典是将任意键映射到一组任意值的结构，而``Series``是将类型化键映射到一组类型化值的结构。这种类型很重要：正如 NumPy 数组后面的特定于类型的编译代码，使其在某些操作方面，比 Python 列表更有效，Pandas ``Series``的类型信息使其比 Python 字典更有效。
+
+通过直接从 Python 字典构造一个``Series``对象，可以使``Series``和字典的类比更加清晰：
+
+```py
+population_dict = {'California': 38332521,
+                   'Texas': 26448193,
+                   'New York': 19651127,
+                   'Florida': 19552860,
+                   'Illinois': 12882135}
+population = pd.Series(population_dict)
+population
+
+'''
+California    38332521
+Florida       19552860
+Illinois      12882135
+New York      19651127
+Texas         26448193
+dtype: int64
+'''
+```
+
+默认情况下，这将创建一个``Series``，其中索引是从有序键中提取的。从这里开始，我们可以执行典型的字典式的项目访问：
+
+```py
+population['California']
+
+# 38332521
+```
+
+但是，与字典不同，``Series``也支持数组式的操作，例如切片：
+
+```py
+population['California':'Illinois']
+
+'''
+California    38332521
+Florida       19552860
+Illinois      12882135
+dtype: int64
+'''
+```
+
+我们将在“数据索引和选择”中讨论 Pandas 索引和切片的一些怪异之处。
+
+### 构造序列对象
+
+我们已经看到了从头开始构建 Pandas ``Series``的几种方法；所有这些都是以下内容的某个版本：
+
+```py
+>>> pd.Series(data, index=index)
+```
+
+其中``index``是一个可选参数，``data``可以是许多实体之一。
+
+例如，``data``可以是列表或 NumPy 数组，在这种情况下``index``默认为整数序列：
+
+```py
+pd.Series([2, 4, 6])
+
+'''
+0    2
+1    4
+2    6
+dtype: int64
+'''
+```
+
+``data``可以是标量，被重复来填充指定的索引：
+
+```py
+pd.Series(5, index=[100, 200, 300])
+
+'''
+100    5
+200    5
+300    5
+dtype: int64
+'''
+```
+
+``data``可以是一个字典，其中``index``默认为有序的字典键：
+
+```py
+pd.Series({2:'a', 1:'b', 3:'c'})
+
+'''
+1    b
+2    a
+3    c
+dtype: object
+'''
+```
+
+在每种情况下，如果偏向不同的结果，则可以显式设置索引：
+
+```py
+pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])
+
+'''
+3    c
+2    a
+dtype: object
+'''
+```
+
+请注意，在这种情况下，``Series``仅仅由明确标识的键填充。
+
+## Pandas 数据帧对象
+
+Pandas 的下一个基本结构是``DataFrame``。与前一节中讨论的``Series``对象一样，``DataFrame``可以被认为是 NumPy 数组的扩展，也可以被认为是 Python 字典的特化。我们现在来看看这些观点。
+
+### 作为扩展的 NumPy 数组的``DataFrame``
+
+如果``Series``是具有灵活索引的一维数组的模拟，则``DataFrame``是具有灵活行索引和灵活列名的二维数组的模拟。正如你可能将二维数组视为对齐的一维列的有序序列一样，你可以将`DataFrame`视为对齐的``Series``对象的序列。在这里，“对齐”是指它们共享相同的索引。
+
+为了演示这一点，让我们首先构建一个新的``Series``，列出上一节讨论的五个州中的每个州的面积：
+
+```py
+area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
+             'Florida': 170312, 'Illinois': 149995}
+area = pd.Series(area_dict)
+area
+
+'''
+California    423967
+Florida       170312
+Illinois      149995
+New York      141297
+Texas         695662
+dtype: int64
+'''
+```
+
+现在我们已经有了它，以及之前的``population`序列，我们可以使用字典来构造包含这些信息的单个二维对象：
+
+```py
+states = pd.DataFrame({'population': population,
+                       'area': area})
+states
+```
+
+|  | area | population |
+| --- | --- | --- |
+| California | 423967 | 38332521 |
+| Florida | 170312 | 19552860 |
+| Illinois | 149995 | 12882135 |
+| New York | 141297 | 19651127 |
+| Texas | 695662 | 26448193 |
+
+就像``Series``对象一样，``DataFrame``有一个``index``属性，可以访问索引标签：
+
+```py
+states.index
+
+# Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')
+```
+
+另外，``DataFrame``有``columns``属性，它是一个包含列标签的``Index``对象：
+
+```py
+states.columns
+
+# Index(['area', 'population'], dtype='object')
+```
+
+因此，``DataFrame``可以认为是二维 NumPy 数组的扩展，其中行和列都具有用于访问数据的通用索引。
+
+### 作为特化字典的`DataFrame`
+
+同样，我们也可以将``DataFrame``视为字典的特化。
+字典将键映射到值，`DataFrame`将列名称映射到列数据的`Series`。例如，要求``'area'``属性返回``Series``对象，包含我们之前看到的面积：
+
+```py
+states['area']
+
+'''
+California    423967
+Florida       170312
+Illinois      149995
+New York      141297
+Texas         695662
+Name: area, dtype: int64
+'''
+```
+
+注意这里潜在的混淆点：在一个二维 NumPy 数组中，``data[0]``将返回第一行。对于``DataFrame``，``data ['col0']``将返回第一列。因此，最好将`DataFrame`视为扩展的字典而不是扩展的数组，尽管两种看待这个情况的方式都是实用的。我们将在“数据索引和选择”中，探索更灵活的索引`DataFrame`的方法。
+
+### 构造`DataFrame`对象
+
+Pandas ``DataFrame``可以通过多种方式构建。这里我们举几个例子。
+
+#### 来自单个`Series`对象
+
+``DataFrame``是``Series``对象的集合，单列``DataFrame``可以从单个``Series``构造：
+
+```py
+pd.DataFrame(population, columns=['population'])
+```
+
+|  | population |
+| --- | --- |
+| California | 38332521 |
+| Florida | 19552860 |
+| Illinois | 12882135 |
+| New York | 19651127 |
+| Texas | 26448193 |
+
+#### 来自字典的列表
+
+任何字典列表都可以制作成`DataFrame`。我们将使用简单的列表推导来创建一些数据：
+
+```py
+data = [{'a': i, 'b': 2 * i}
+        for i in range(3)]
+pd.DataFrame(data)
+```
+
+|  | a | b |
+| --- | --- | --- |
+| 0 | 0 | 0 |
+| 1 | 1 | 2 |
+| 2 | 2 | 4 |
+
+即使字典中的某些键丢失，Pandas 也会用`NaN`（即“非数字”）值填充它们：
+
+```py
+pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])
+```
+
+|  | a | b | c |
+| --- | --- | --- | --- |
+| 0 | 1.0 | 2 | NaN |
+| 1 | NaN | 3 | 4.0 |
+
+#### 来自序列对象的字典
+
+正如我们之前看到的那样，``DataFrame``也可以从``Series``对象的字典构造：
+
+```py
+pd.DataFrame({'population': population,
+              'area': area})
+```
+
+|  | area | population |
+| --- | --- | --- |
+| California | 423967 | 38332521 |
+| Florida | 170312 | 19552860 |
+| Illinois | 149995 | 12882135 |
+| New York | 141297 | 19651127 |
+| Texas | 695662 | 26448193 |
+
+#### 来自二维 NumPy 数组
+
+给定一个二维数据数组，我们可以创建一个``DataFrame``，带有任何指定列和索引名称。如果省略，将为每个使用整数索引：
+
+```py
+pd.DataFrame(np.random.rand(3, 2),
+             columns=['foo', 'bar'],
+             index=['a', 'b', 'c'])
+```
+
+|  | foo | bar |
+| --- | --- | --- |
+| a | 0.865257 | 0.213169 |
+| b | 0.442759 | 0.108267 |
+| c | 0.047110 | 0.905718 |
+
+#### 来自 NumPy 结构化数组
+
+我们在“结构化数据：NumPy 的结构化数组”：中介绍了结构化数组。Pandas ``DataFrame``的原理与结构化数组非常相似，可以直接从它创建：
+
+```py
+A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
+A
+
+'''
+array([(0, 0.0), (0, 0.0), (0, 0.0)], 
+      dtype=[('A', '<i8'), ('B', '<f8')])
+'''
+
+pd.DataFrame(A)
+```
+
+|  | A | B |
+| --- | --- | --- |
+| 0 | 0 | 0.0 |
+| 1 | 0 | 0.0 |
+| 2 | 0 | 0.0 |
+
+## Pandas 索引对象
+
+我们在这里看到，``Series``和``DataFrame``对象都包含显式的索引，它允许你引用和修改数据。这个``Index``对象本身就是一个有趣的结构，它可以认为是不可变数组或有序集合（技术上是一个多值集合，因为``Index``对象可能包含重复的值）。
+
+这些观点在``Index``对象所提供的操作中，有一些有趣的结果。举个简单的例子，让我们从整数列表构造一个``Index``：
+
+```py
+ind = pd.Index([2, 3, 5, 7, 11])
+ind
+
+# Int64Index([2, 3, 5, 7, 11], dtype='int64')
+```
+
+### 作为不可变数组的索引
+
+``Index``在很多方面都像数组一样。例如，我们可以使用标准的 Python 索引表示法来检索值或切片：
+
+```py
+ind[1]
+
+# 3
+
+ind[::2]
+
+# Int64Index([2, 5, 11], dtype='int64')
+```
+
+`Index``对象也有许多来自 NumPy 数组的熟悉的属性：
+
+```py
+print(ind.size, ind.shape, ind.ndim, ind.dtype)
+
+# 5 (5,) 1 int64
+```
+
+``Index``对象和NumPy数组之间的一个区别是，索引是不可变的 - 也就是说，它们不能通过常规方式修改：
+
+```py
+ind[1] = 0
+
+'''
+---------------------------------------------------------------------------
+
+TypeError                                 Traceback (most recent call last)
+
+<ipython-input-34-40e631c82e8a> in <module>()
+----> 1 ind[1] = 0
+
+
+/Users/jakevdp/anaconda/lib/python3.5/site-packages/pandas/indexes/base.py in __setitem__(self, key, value)
+   1243 
+   1244     def __setitem__(self, key, value):
+-> 1245         raise TypeError("Index does not support mutable operations")
+   1246 
+   1247     def __getitem__(self, key):
+
+
+TypeError: Index does not support mutable operations
+'''
+```
+
+这种不变性使得，在多个`DataFrame`和数组之间共享索引更安全，避免了由无意的索引修改而导致的潜在的副作用。
+
+### 作为有序集合的索引
+
+Pandas 对象旨在促进一些操作，例如跨数据集的连接，这取决于集合运算的许多方面。``Index``对象遵循 Python 内置的``set``数据结构使用的许多约定，因此可以用熟悉的方式计算并集，交集，差集和其他组合：
+
+```py
+indA = pd.Index([1, 3, 5, 7, 9])
+indB = pd.Index([2, 3, 5, 7, 11])
+
+indA & indB  # 交集
+
+# Int64Index([3, 5, 7], dtype='int64')
+
+indA | indB  # 并集
+
+# Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')
+
+indA ^ indB  # 对称差集
+
+# Int64Index([1, 2, 9, 11], dtype='int64')
+```
+
+这些操作也可以通过对象方法访问，例如``indiA.intersection(imdB)``。
diff --git a/docs/7.5.md b/docs/7.5.md
new file mode 100644
index 0000000..e06e0c6
--- /dev/null
+++ b/docs/7.5.md
@@ -0,0 +1,437 @@
+# 7.5 数据索引和选择
+
+> 原文：[Data Indexing and Selection](https://nbviewer.jupyter.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/03.02-Data-Indexing-and-Selection.ipynb)
+> 
+> 译者：[飞龙](https://github.com/wizardforcel)
+> 
+> 协议：[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)
+> 
+> 本节是[《Python 数据科学手册》](https://github.com/jakevdp/PythonDataScienceHandbook)（Python Data Science Handbook）的摘录。
+
+在第二章中，我们详细介绍了在 NumPy 数组中访问，设置和修改值的方法和工具。这些包括索引（例如，``arr[2,1]``），切片（例如，``arr[:, 1:5]``），掩码（例如，``arr[arr > 0]`` ），花式索引（例如，``arr[0, [1, 5]]``）及其组合（例如，``arr[:, [1, 5]]``）。
+
+在这里，我们将看看在 Pandas ``Series``和``DataFrame``对象中，访问和修改值的类似方法。如果你使用过 NumPy 模式，Pandas 中的相应模式将会非常熟悉，尽管有一些需要注意的怪异之处。
+
+我们将从一维``Series``对象的简单情况开始，然后转向更复杂的二维``DataFrame``对象。
+
+## 序列中的数据选择
+
+我们在上一节中看到，``Series``对象在很多方面都像一维 NumPy 数组，并且在许多方面像标准的 Python 字典。如果我们记住这两个重叠的类比，它将帮助我们理解这些数组中的数据索引和选择的模式。
+
+### 作为字典的序列
+
+像字典一样，``Series``对象提供从一组键到一组值的映射：
+
+```py
+import pandas as pd
+data = pd.Series([0.25, 0.5, 0.75, 1.0],
+                 index=['a', 'b', 'c', 'd'])
+data
+
+'''
+a    0.25
+b    0.50
+c    0.75
+d    1.00
+dtype: float64
+'''
+
+data['b']
+
+# 0.5
+```
+
+我们还可以使用字典式的 Python 表达式和方法，来检查键/索引和值：
+
+```py
+'a' in data
+
+# True
+
+data.keys()
+
+# Index(['a', 'b', 'c', 'd'], dtype='object')
+
+list(data.items())
+
+# [('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]
+```
+
+``Series``对象甚至可以用类字典语法来修改。就像你可以通过为新键赋值来扩展字典，你可以通过为新索引赋值来扩展``Series``：
+
+```py
+data['e'] = 1.25
+data
+
+'''
+a    0.25
+b    0.50
+c    0.75
+d    1.00
+e    1.25
+dtype: float64
+'''
+```
+
+对象的这种容易修改的特性，是一个方便的特性：在其背后，Pandas 正在决定可能需要执行的内存布局和数据复制；用户通常不需要担心这些问题。
+
+### 作为一维数组的序列
+
+``Series``建立字典式接口上，并通过与 NumPy 数组相同的基本机制，提供数组式的项目选择，即切片，掩码和花式索引。这些例子如下：
+
+```py
+# 按照显式下标来切片
+data['a':'c']
+
+'''
+a    0.25
+b    0.50
+c    0.75
+dtype: float64
+'''
+
+# 按照隐式下标来切片
+data[0:2]
+
+'''
+a    0.25
+b    0.50
+dtype: float64
+'''
+
+# 掩码
+data[(data > 0.3) & (data < 0.8)]
+
+'''
+b    0.50
+c    0.75
+dtype: float64
+'''
+
+# 花式索引
+data[['a', 'e']]
+
+'''
+a    0.25
+e    1.25
+dtype: float64
+'''
+```
+
+其中，切片可能是混乱的根源。注意，当使用显式索引进行切片时（即``data['a':'c']``），切片中包含最终索引，而在使用隐式索引进行切片时（即``data[0:2]``），最终索引从切片中排除。
+
+### 索引器：`loc`，`iloc`，和`ix`
+
+这些切片和索引惯例可能会引起混淆。例如，如果你的``Series``拥有显式的整数索引，那么索引操作如``data[1]``将使用显式索引，而切片操作如``data[1:3]``将使用隐式的 Python 风格索引。
+
+```py
+data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
+data
+
+'''
+1    a
+3    b
+5    c
+dtype: object
+'''
+
+# 索引的时候是显式索引
+data[1]
+
+# 'a'
+
+# 切片的时候是隐式索引
+data[1:3]
+
+'''
+3    b
+5    c
+dtype: object
+'''
+```
+
+由于在整数索引的情况下存在这种潜在的混淆，Pandas 提供了一些特殊的索引器属性，这些属性明确地提供了特定的索引方案。这些不是函数方法而是属性，它们将特定切片接口提供给``Series``中的数组。
+
+首先，``loc``属性让索引和切片始终引用显式索引：
+
+```py
+data.loc[1]
+
+# 'a'
+
+data.loc[1:3]
+
+'''
+1    a
+3    b
+dtype: object
+'''
+```
+
+``iloc``属性让索引和切片始终引用隐式的 Python 风格索引：
+
+```py
+data.iloc[1]
+
+# 'b'
+
+data.iloc[1:3]
+
+'''
+3    b
+5    c
+dtype: object
+'''
+```
+
+第三个索引属性``ix``是两者的混合，对``Series``对象来说，相当于标准的``[]``风格的索引。在``DataFrame``对象的上下文中，``ix``索引器的目的将变得更加明显，我们将在稍后讨论。
+
+Python 代码的一个指导原则是“显式优于隐式”。``loc``和``iloc``的显式特性，使它们在维护清晰可读的代码时非常有用；特别是在整数索引的情况下，我建议使用这两者，来使代码更容易阅读和理解，并防止由于混合索引/切片约定而导致的细微错误。
+
+## 数据帧中的数据选择
+
+回想一下，``DataFrame``在很多方面都类似二维或结构化数组，在其它方面莱斯共享相同索引的``Series``结构的字典。在我们探索此结构中的数据选择时，记住些类比是有帮助的。
+
+### 作为字典的数据帧
+
+我们将考虑的第一个类比是，``DataFrame``作为相关``Series``对象的字典。让我们回到我们的州人口和面积的例子：
+
+```py
+area = pd.Series({'California': 423967, 'Texas': 695662,
+                  'New York': 141297, 'Florida': 170312,
+                  'Illinois': 149995})
+pop = pd.Series({'California': 38332521, 'Texas': 26448193,
+                 'New York': 19651127, 'Florida': 19552860,
+                 'Illinois': 12882135})
+data = pd.DataFrame({'area':area, 'pop':pop})
+data
+```
+
+|  | area | pop |
+| --- | --- | --- |
+| California | 423967 | 38332521 |
+| Florida | 170312 | 19552860 |
+| Illinois | 149995 | 12882135 |
+| New York | 141297 | 19651127 |
+| Texas | 695662 | 26448193 |
+
+构成``DataFrame``列的单个``Series``，可以通过列名称的字典式索引来访问：
+
+```py
+data['area']
+
+'''
+California    423967
+Florida       170312
+Illinois      149995
+New York      141297
+Texas         695662
+Name: area, dtype: int64
+'''
+```
+
+同样，我们可以使用列名的字符串和属性风格来访问：
+
+```py
+data.area
+
+'''
+California    423967
+Florida       170312
+Illinois      149995
+New York      141297
+Texas         695662
+Name: area, dtype: int64
+'''
+```
+
+属性风格的列访问，与字典风格的访问，实际上访问了完全相同的对象：
+
+```py
+data.area is data['area']
+
+# True
+```
+
+虽然这是一个有用的简写，但请记住，它并不适用于所有情况！例如，如果列名不是字符串，或者列名与`DataFrame`的方法冲突，则无法进行属性风格的访问。例如，``DataFrame``有``pop()``方法，所以``data.pop``将指向它而不是``pop``列：
+
+```py
+data.pop is data['pop']
+
+# False
+```
+
+特别是，你应该避免尝试通过属性对列赋值（即使用``data['pop'] = z``而不是``data.pop = z``）。
+
+与前面讨论的``Series``对象一样，这种字典式语法也可用于修改对象，在这里添加一个新列：
+
+```py
+data['density'] = data['pop'] / data['area']
+data
+```
+
+|  | area | pop | density |
+| --- | --- | --- | --- |
+| California | 423967 | 38332521 | 90.413926 |
+| Florida | 170312 | 19552860 | 114.806121 |
+| Illinois | 149995 | 12882135 | 85.883763 |
+| New York | 141297 | 19651127 | 139.076746 |
+| Texas | 695662 | 26448193 | 38.018740 |
+
+这显示了``Series``对象之间的逐元素算术的直接语法；我们将在“使用 Pandas 中的数据进行操作”中深入研究它。
+
+### 作为二维数组的数据帧
+
+如前所述，我们还可以将``DataFrame``视为扩展的二维数组。我们可以使用``values``属性检查原始底层数据数组：
+
+```py
+data.values
+
+'''
+array([[  4.23967000e+05,   3.83325210e+07,   9.04139261e+01],
+       [  1.70312000e+05,   1.95528600e+07,   1.14806121e+02],
+       [  1.49995000e+05,   1.28821350e+07,   8.58837628e+01],
+       [  1.41297000e+05,   1.96511270e+07,   1.39076746e+02],
+       [  6.95662000e+05,   2.64481930e+07,   3.80187404e+01]])
+'''
+```
+
+考虑到这一点，许多熟悉的数组式观测，可以在``DataFrame``本身上执行。例如，我们可以转置完整的``DataFrame``来交换行和列：
+
+```py
+data.T
+```
+
+|  | California | Florida | Illinois | New York | Texas |
+| --- | --- | --- | --- | --- | --- |
+| area | 4.239670e+05 | 1.703120e+05 | 1.499950e+05 | 1.412970e+05 | 6.956620e+05 |
+| pop | 3.833252e+07 | 1.955286e+07 | 1.288214e+07 | 1.965113e+07 | 2.644819e+07 |
+| density | 9.041393e+01 | 1.148061e+02 | 8.588376e+01 | 1.390767e+02 | 3.801874e+01 |
+
+然而，当谈到`DataFrame`对象的索引时，很明显列的字典式索引，让我们不能将其简单地视为 NumPy 数组。特别是，将单个索引传递给数组会访问一行：
+
+```py
+data.values[0]
+
+'''
+array([  4.23967000e+05,   3.83325210e+07,   9.04139261e+01])
+'''
+```
+
+将单个“索引”传递给``DataFrame``会访问一列：
+
+```py
+data['area']
+
+'''
+California    423967
+Florida       170312
+Illinois      149995
+New York      141297
+Texas         695662
+Name: area, dtype: int64
+'''
+```
+
+因此，对于数组风格的索引，我们需要另一个惯例。在这里，Pandas 再次使用前面提到的``loc``，``iloc``和``ix``索引器。使用``iloc``索引器，我们可以索引底层数组，好像它是一个简单的 NumPy 数组（使用隐式的 Python 风格索引），但结果中保留了``DataFrame``索引和列标签：
+
+```py
+data.iloc[:3, :2]
+```
+
+|  | area | pop |
+| --- | --- | --- |
+| California | 423967 | 38332521 |
+| Florida | 170312 | 19552860 |
+| Illinois | 149995 | 12882135 |
+
+与之类似，使用``loc``索引器，我们可以用数组风格索引底层数据，但是使用显式索引和列名称：
+
+```py
+data.loc[:'Illinois', :'pop']
+```
+
+|  | area | pop |
+| --- | --- | --- |
+| California | 423967 | 38332521 |
+| Florida | 170312 | 19552860 |
+| Illinois | 149995 | 12882135 |
+
+``ix``索引器是这两种方法的混合：
+
+```py
+data.ix[:3, :'pop']
+```
+
+|  | area | pop |
+| --- | --- | --- |
+| California | 423967 | 38332521 |
+| Florida | 170312 | 19552860 |
+| Illinois | 149995 | 12882135 |
+
+请记住，对于整数索引，``ix``索引器具有与整数索引的``Series``对象相同的潜在混淆。
+
+任何熟悉的 NumPy 风格的数据访问模式，都可以在这些索引器中使用。例如，在``loc``索引器中，我们可以组合掩码和花式索引，如下所示：
+
+```py
+data.loc[data.density > 100, ['pop', 'density']]
+```
+
+|  | pop | density |
+| --- | --- | --- |
+| Florida | 19552860 | 114.806121 |
+| New York | 19651127 | 139.076746 |
+
+任何这些索引惯例也可用于设置或修改值；你可能习惯使用 NumPy 的标准方式完成它们：
+
+```py
+data.iloc[0, 2] = 90
+data
+```
+
+|  | area | pop | density |
+| --- | --- | --- | --- |
+| California | 423967 | 38332521 | 90.000000 |
+| Florida | 170312 | 19552860 | 114.806121 |
+| Illinois | 149995 | 12882135 | 85.883763 |
+| New York | 141297 | 19651127 | 139.076746 |
+| Texas | 695662 | 26448193 | 38.018740 |
+
+为了提高你对 Pandas 数据操作的流畅性，我建议花一些时间使用简单的``DataFrame``，并探索各种索引方法所允许的索引，切片，掩码和花式索引。
+
+### 额外的索引惯例
+
+有一些额外的索引约定可能与前面的讨论不一致，但在实践中可能非常有用。首先，索引引用列，切片引用行：
+
+```py
+data['Florida':'Illinois']
+```
+
+|  | area | pop | density |
+| --- | --- | --- | --- |
+| Florida | 170312 | 19552860 | 114.806121 |
+| Illinois | 149995 | 12882135 | 85.883763 |
+
+这样的切片也可以通过数字而不是索引来引用行：
+
+```py
+data[1:3]
+```
+
+|  | area | pop | density |
+| --- | --- | --- | --- |
+| Florida | 170312 | 19552860 | 114.806121 |
+| Illinois | 149995 | 12882135 | 85.883763 |
+
+与之类似，直接掩码操作也是按行而不是按列解释的：
+
+```py
+data[data.density > 100]
+```
+
+|  | area | pop | density |
+| --- | --- | --- | --- |
+| Florida | 170312 | 19552860 | 114.806121 |
+| New York | 141297 | 19651127 | 139.076746 |
+
+这两个惯例在语法上类似于 NumPy 数组上的惯例，虽然这些惯例可能不完全符合 Pandas 惯例，但它们在实践中非常有用。
-- 
GitLab