diff --git "a/docs/4.\350\256\255\347\273\203\346\250\241\345\236\213.md" "b/docs/4.\350\256\255\347\273\203\346\250\241\345\236\213.md"
index a2165fa9ee79badc1a8f993b0684671ba46e8df6..00a15c7f533e767345c902e38e3d95d96448b5bf 100644
--- "a/docs/4.\350\256\255\347\273\203\346\250\241\345\236\213.md"
+++ "b/docs/4.\350\256\255\347\273\203\346\250\241\345\236\213.md"
@@ -84,7 +84,7 @@ y = 4 + 3 * X + np.random.randn(100, 1)
 
 ```python 
 X_b = np.c_[np.ones((100, 1)), X] 
-theta_best = np.linalg.inv(X_b.T.dot(X_B)).dot(X_b.T).dot(y)
+theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
 ```
 
 我们生产数据的函数实际上是 ![y = 4 + 3x_0 + 高斯噪声](../images/tex-751d6173162c5bb7b6294ca57e03d5b1.gif)。让我们看一下最后的计算结果。
@@ -101,7 +101,7 @@ array([[4.21509616],[2.77011339]])
 ```python 
 >>> X_new = np.array([[0],[2]])
 >>> X_new_b = np.c_[np.ones((2, 1)), X_new]
->>> y_predict = X_new_b.dot(theta.best)
+>>> y_predict = X_new_b.dot(theta_best)
 >>> y_predict
 array([[4.21509616],[9.75532293]])
 ```
@@ -226,7 +226,7 @@ theta = np.random.randn(2,1) # 随机初始值
 
 for iteration in range(n_iterations):
     gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
-    theta = theta - eta * gradiens
+    theta = theta - eta * gradients
 ```
 
 这不是太难，让我们看一下最后的结果 ![\theta](../images/tex-2554a2bb846cffd697389e5dc8912759.gif)：
@@ -282,9 +282,9 @@ for epoch in range(n_epochs):
         random_index = np.random.randint(m)
         xi = X_b[random_index:random_index+1]
         yi = y[random_index:random_index+1]
-        gradients = 2 * xi.T.dot(xi,dot(theta)-yi)
+        gradients = 2 * xi.T.dot(xi.dot(theta)-yi)
         eta = learning_schedule(epoch * m + i)
-        theta = theta - eta * gradiens
+        theta = theta - eta * gradients
 ```
 
 按习惯来讲，我们进行 ![m](../images/tex-6f8f57715090da2632453988d9a1501b.gif) 轮的迭代，每一轮迭代被称为一代。在整个训练集上，随机梯度下降迭代了 1000 次时，一般在第 50 次的时候就可以达到一个比较好的结果。
@@ -306,7 +306,7 @@ array([[4.21076011],[2.748560791]])
 
 ```python
 from sklearn.linear_model import SGDRegressor
-sgd_reg + SGDRregressor(n_iter=50, penalty=None, eta0=0.1)
+sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1)
 sgd_reg.fit(X,y.ravel())
 ```