21

e81332b9 · wizardforcel · dc81c6ac · e81332b9 · e81332b9
隐藏空白更改
内联并排

Showing with 64 addition and 64 deletion

20.md 20.md +49 -49

21.md 21.md +15 -15

未找到文件。
--- a/20.md
+++ b/20.md
@@ -8,7 +8,7 @@

 ## MatPlotLib 中的双向条形图

-```
+```py
 %matplotlib inline
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -31,7 +31,7 @@ df
 | 3 | Jake | 2 | 62 | 23 |
 | 4 | Amy | 3 | 70 | 51 |

-```
+```py
 # 输入数据，特别是第二和
 # 第三行，跳过第一列
 x1 = df.ix[1, 1:]
@@ -84,7 +84,7 @@ plt.show()

 ## MatPlotLib 中的条形图

-```
+```py
 %matplotlib inline
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -107,7 +107,7 @@ df
 | 3 | Jake | 2 | 62 | 23 |
 | 4 | Amy | 3 | 70 | 51 |

-```
+```py
 # 为每个变量创建得分均值的列表
 mean_values = [df['pre_score'].mean(), df['mid_score'].mean(), df['post_score'].mean()]

@@ -152,7 +152,7 @@ plt.show()

 ## Seaborn 中的调色板

-```
+```py
 import pandas as pd
 %matplotlib inline
 import matplotlib.pyplot as plt
@@ -177,139 +177,139 @@ sns.palplot(sns.color_palette("deep", 10))

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_5_0.png)

-```
+```py
 sns.palplot(sns.color_palette("muted", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_6_0.png)

-```
+```py
 sns.palplot(sns.color_palette("bright", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_7_0.png)

-```
+```py
 sns.palplot(sns.color_palette("dark", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_8_0.png)

-```
+```py
 sns.palplot(sns.color_palette("colorblind", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_9_0.png)

-```
+```py
 sns.palplot(sns.color_palette("Paired", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_10_0.png)

-```
+```py
 sns.palplot(sns.color_palette("BuGn", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_11_0.png)

-```
+```py
 sns.palplot(sns.color_palette("GnBu", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_12_0.png)

-```
+```py
 sns.palplot(sns.color_palette("OrRd", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_13_0.png)

-```
+```py
 sns.palplot(sns.color_palette("PuBu", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_14_0.png)

-```
+```py
 sns.palplot(sns.color_palette("YlGn", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_15_0.png)

-```
+```py
 sns.palplot(sns.color_palette("YlGnBu", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_16_0.png)

-```
+```py
 sns.palplot(sns.color_palette("YlOrBr", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_17_0.png)

-```
+```py
 sns.palplot(sns.color_palette("YlOrRd", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_18_0.png)

-```
+```py
 sns.palplot(sns.color_palette("BrBG", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_19_0.png)

-```
+```py
 sns.palplot(sns.color_palette("PiYG", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_20_0.png)

-```
+```py
 sns.palplot(sns.color_palette("PRGn", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_21_0.png)

-```
+```py
 sns.palplot(sns.color_palette("PuOr", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_22_0.png)

-```
+```py
 sns.palplot(sns.color_palette("RdBu", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_23_0.png)

-```
+```py
 sns.palplot(sns.color_palette("RdGy", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_24_0.png)

-```
+```py
 sns.palplot(sns.color_palette("RdYlBu", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_25_0.png)

-```
+```py
 sns.palplot(sns.color_palette("RdYlGn", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_26_0.png)

-```
+```py
 sns.palplot(sns.color_palette("Spectral", 10))
 ```

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_27_0.png)

-```
+```py
 # 创建调色板并将其设为当前调色板
 flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
 sns.set_palette(flatui)
@@ -318,7 +318,7 @@ sns.palplot(sns.color_palette())

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes_29_0.png)

-```
+```py
 # 设置绘图颜色
 sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="#34495e")
@@ -330,7 +330,7 @@ sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df

 ## 使用 Seaborn 和 pandas 创建时间序列绘图

-```
+```py
 import pandas as pd
 %matplotlib inline
 import matplotlib.pyplot as plt
@@ -357,7 +357,7 @@ sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df

 ![png](https://chrisalbon.com/python/data_visualization/seaborn_pandas_timeseries_plot_5_1.png)

-```
+```py
 # 带有置信区间直线，但是没有直线的时间序列绘图
 sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], err_style="ci_bars", interpolate=False)
@@ -369,7 +369,7 @@ sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df

 ## 使用 Seaborn 创建散点图

-```
+```py
 import pandas as pd
 %matplotlib inline
 import random
@@ -397,7 +397,7 @@ df.head()
 | 3 | 510 | 206 | 1 | female |
 | 4 | 848 | 357 | 0 | female |

-```
+```py
 # 设置散点图样式
 sns.set_context("notebook", font_scale=1.1)
 sns.set_style("ticks")
@@ -427,7 +427,7 @@ plt.ylabel('Deaths')

 ## MatPlotLib 中的分组条形图

-```
+```py
 %matplotlib inline
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -449,7 +449,7 @@ df
 | 3 | Jake | 2 | 62 | 23 |
 | 4 | Amy | 3 | 70 | 51 |

-```
+```py
 # 设置条形的位置和宽度
 pos = list(range(len(df['pre_score']))) 
 width = 0.25 
@@ -525,7 +525,7 @@ plt.show()

 ## MatPlotLib 中的直方图

-```
+```py
 %matplotlib inline
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -551,7 +551,7 @@ df.head()
 | 3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1 | 1 | 18000 | 20000 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1 | Green Fork | The Riverlands | NaN |
 | 4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1 | 1 | 1875 | 6000 | Robb Stark, Brynden Tully | Jaime Lannister | 1 | Whispering Wood | The Riverlands | NaN |

-```
+```py
 # 制作攻击方和防守方大小的两个变量
 # 但是当有超过 10000 个攻击方时将其排除在外
 data1 = df['attacker_size'][df['attacker_size'] < 90000]
@@ -588,7 +588,7 @@ plt.show()

 ![png](https://chrisalbon.com/python/data_visualization/matplotlib_histogram_6_0.png)

-```
+```py
 # 制作攻击方和防守方大小的两个变量
 # 但是当有超过 10000 个攻击方时将其排除在外
 data1 = df['attacker_size'][df['attacker_size'] < 90000]
@@ -640,7 +640,7 @@ plt.show()

 ## 从 Pandas 数据帧生成 MatPlotLib 散点图

-```
+```py
 %matplotlib inline
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -664,7 +664,7 @@ df
 | 3 | Jake | Milner | 24 | 0 | 2 | 62 |
 | 4 | Amy | Cooze | 73 | 1 | 3 | 70 |

-```
+```py
 # preTestScore 和 postTestScore 的散点图
 # 每个点的大小取决于年龄
 plt.scatter(df.preTestScore, df.postTestScore
@@ -675,7 +675,7 @@ plt.scatter(df.preTestScore, df.postTestScore

 ![png](https://chrisalbon.com/python/data_visualization/matplotlib_scatterplot_from_pandas_6_1.png)

-```
+```py
 # preTestScore 和 postTestScore 的散点图
 # 大小为 300，颜色取决于性别
 plt.scatter(df.preTestScore, df.postTestScore, s=300, c=df.female)
@@ -687,7 +687,7 @@ plt.scatter(df.preTestScore, df.postTestScore, s=300, c=df.female)

 ## Matplotlib 的简单示例

-```
+```py
 # 让 Jupyter 加载 matplotlib 
 # 并内联创建所有绘图（也就是在页面上）
 %matplotlib inline
@@ -703,7 +703,7 @@ pyplot.plot([1.6, 2.7])

 ## MatPlotLib 中的饼图

-```
+```py
 %matplotlib inline
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -724,7 +724,7 @@ df
 | 3 | Jake | 2 | 62 | 23 |
 | 4 | Amy | 3 | 70 | 51 |

-```
+```py
 # 创建一列，其中包含每个官员的总逮捕数
 df['total_arrests'] = df['jan_arrests'] + df['feb_arrests'] + df['march_arrests']
 df
@@ -738,7 +738,7 @@ df
 | 3 | Jake | 2 | 62 | 23 | 87 |
 | 4 | Amy | 3 | 70 | 51 | 124 |

-```
+```py
 # （从 iWantHue）创建一列颜色
 colors = ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"]

@@ -772,7 +772,7 @@ plt.show()

 ## MatPlotLib 中的散点图

-```
+```py
 %matplotlib inline
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -796,7 +796,7 @@ df.head()
 | 3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1.0 | 1.0 | 18000.0 | 20000.0 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1.0 | Green Fork | The Riverlands | NaN |
 | 4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1.0 | 1.0 | 1875.0 | 6000.0 | Robb Stark, Brynden Tully | Jaime Lannister | 1.0 | Whispering Wood | The Riverlands | NaN |

-```
+```py
 # 创建图形
 plt.figure(figsize=(10,8))

@@ -869,7 +869,7 @@ plt.show()

 ## MatPlotLib 中的栈式百分比条形图

-```
+```py
 %matplotlib inline
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -890,7 +890,7 @@ df
 | 3 | Jake | 2 | 62 | 23 |
 | 4 | Amy | 3 | 70 | 51 |

-```
+```py
 # 创建带有一个子图的图形
 f, ax = plt.subplots(1, figsize=(10,5))


--- a/21.md
+++ b/21.md
@@ -20,7 +20,7 @@ $ s^2 = \frac {1}{n-1} \sum_{i=1}^n \left(x_i - \overline{x} \right)^ 2 $

 ## 演示中心极限定律

-```
+```py
 # 导入包
 import pandas as pd
 import numpy as np
@@ -43,7 +43,7 @@ population['numbers'].hist(bins=100)

 ![png](https://chrisalbon.com/statistics/frequentist/demonstrate_the_central_limit_theorem_5_1.png)

-```
+```py
 # 查看数值的均值
 population['numbers'].mean()

@@ -69,7 +69,7 @@ pd.Series(sampled_means).hist(bins=100)

 这是关键的图表，记住总体分布是均匀的，然而，这个分布接近正态。 这是中心极限理论的关键点，也是我们可以假设样本均值是无偏的原因。

-```
+```py
 # 查看 sampled_means 的均值
 pd.Series(sampled_means).mean()

@@ -88,7 +88,7 @@ print('The Mean Sample Mean is only %f different the True Population mean!' % er

 基于 [cbare](http://stackoverflow.com/users/199166/cbare) 的[这个](http://stackoverflow.com/a/17389980/2935984) StackOverflow 答案。

-```
+```py
 import statistics as stats

 x = [1,2,3,4,5,6,7,8,9]
@@ -100,7 +100,7 @@ $r={\frac {1}{n-1}}\sum_{i=1}^{n}\left({\frac {x_{i}-{\bar {x}}}{s_{x}}}\right)\

 其中 $s_{x}$ 和 $s_{y}$ 是 $x$ 和 $y $ 的标准差，$\left({\frac {x_{i}-{\bar {x}}}{s_{x}}}\right)$ 是 $x$ 和 $y$ 的[标准得分](https://en.wikipedia.org/wiki/Standard_score)。

-```
+```py
 # 创建函数
 def pearson(x,y):

@@ -146,7 +146,7 @@ pearson(x,y)

 ## 概率质量函数（PMF）

-```
+```py
 # 加载库
 import matplotlib.pyplot as plt

@@ -181,7 +181,7 @@ plt.show()

 ## Spearman 排名相关度

-```
+```py
 import numpy as np
 import pandas as pd
 import scipy.stats
@@ -193,7 +193,7 @@ y = [2,1,2,4.5,7,6.5,6,9,9.5]

 Spearman 的排名相关度，是变量的排名版本的皮尔逊相关系数。

-```
+```py
 # 创建接受 x 和 y 的函数
 def spearmans_rank_correlation(xs, ys):

@@ -220,7 +220,7 @@ scipy.stats.spearmanr(x, y)[0]

 ## T 检验

-```
+```py
 from scipy import stats
 import numpy as np

@@ -237,7 +237,7 @@ y = np.random.normal(0, 1.5, 20)

 想象一下单样本 T 检验，并绘制一个“正态形状的”山丘，以`1`为中心，并以`1.5`为标准差而“展开”，然后在`0`处放置一个标志并查看标志在山丘上的位置。它靠近顶部吗？ 或者远离山丘？ 如果标志靠近山丘的底部或更远，则 t 检验的 p 值将低于`0.05`。

-```
+```py
 # 运行 T 检验来检验 x 的均值和 0 相比，是否有统计学显著的差异
 pvalue = stats.ttest_1samp(x, 0)[1]

@@ -251,7 +251,7 @@ pvalue

 想象一下单样本 T 检验，并根据标准差绘制两个（正态形状的）山丘，以它们的均值为中心，并根据他们的标准差绘制它们的“平坦度”（个体延展度）。 T 检验考察了两座山丘重叠的程度。 它们基本上是彼此覆盖的吗？ 山丘的底部几乎没有碰到吗？ 如果山丘的尾部刚刚重叠或根本不重叠，则 t 检验的 p 值将低于 0.05。

-```
+```py
 stats.ttest_ind(x, y)[1]

 # 0.00035082056802728071 
@@ -265,7 +265,7 @@ stats.ttest_ind(x, y, equal_var=False)[1]

 当我们采集重复样本，并且想要考虑我们正在测试的两个分布是成对的这一事实时，使用配对 T 检验。

-```
+```py
 stats.ttest_rel(x, y)[1]

 # 0.00034222792790150386 
@@ -273,7 +273,7 @@ stats.ttest_rel(x, y)[1]

 ## 方差和标准差

-```
+```py
 # 导入包
 import math

@@ -289,7 +289,7 @@ $ \text{Sample Variance} = S_{n-1}^{2} = \frac{1}{n-1}\sum_{i=1}^{n}(x_i-\bar{x}

 其中 $n$ 是观测数，$\bar{x}$ 是观察值的平均值，$x_i-\bar{x}$ 是单个观察值减去数据均值。 请注意，如果我们根据来自该总体的样本估计总体的方差，我们应该使用第二个等式，将 $n$ 替换为 $n-1$。

-```
+```py
 # 计算 n
 n = len(data)

@@ -325,7 +325,7 @@ population_variance

 标准差就是方差的平方根。

-```
+```py
 # 计算总体方差的平方根
 population_standard_deviation = math.sqrt(population_variance)