diff --git a/Dockerfile b/Dockerfile
index 6ac9901ac6cea12e97047efdfb6272c957f166ae..60e76c7f2ede6beaca11659020d5991a75d5b741 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -22,7 +22,8 @@ COPY ./paddle/scripts/docker/root/ /root/
 
 RUN apt-get update && \
     apt-get install -y \
-    git python-pip python-dev openssh-server bison libnccl-dev \
+    git python-pip python-dev openssh-server bison \
+    libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
     wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
     curl sed grep graphviz libjpeg-dev zlib1g-dev  \
     python-matplotlib gcc-4.8 g++-4.8 \
diff --git a/doc/api/overview.rst b/doc/api/overview.rst
index 953d2db2b30c14c92c2d6d762294c3aebc6eda29..16b6cf42660c51feee09c689c671d5ef06663efb 100644
--- a/doc/api/overview.rst
+++ b/doc/api/overview.rst
@@ -1,4 +1,16 @@
-API Overview
-============
+V2 API Overview
+================
 
-TBD
+The PaddlePaddle V2 API is designed to provide a modern user interface for PaddlePaddle V1(the original layer-based platform of PaddlePaddle),
+it proposes some high-level concepts such as `Layers <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/layer.html>`_ , `Optimizer <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/optimizer.html>`_ , `Evaluator <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/evaluators.html>`_  and `Data Reader <http://www.paddlepaddle.org/docs/develop/api/en/v2/data/data_reader.html>`_ to make the model configuration more familiar to users.
+
+A model is composed of the computation described by a group of `Layers`, with `Evaluator` to define the error, `Optimizer` to update the parameters and `Data Reader` to feed in the data.
+
+We also provide the `interface for Training and Inference <http://www.paddlepaddle.org/docs/develop/api/en/v2/run_logic.html>`_ to help control the training and inference phrase,
+it has several easy to use methods
+
+- `paddle.train` 
+- `paddle.test`
+- `paddle.infer`
+
+to better expose the internal running details, different `events <http://www.paddlepaddle.org/docs/develop/api/en/v2/run_logic.html#event>`_ are available to users by writing some callbacks.
diff --git a/paddle/fluid/platform/nccl_test.cu b/paddle/fluid/platform/nccl_test.cu
index 212ea8517e897f86a3c19bb5d996c567854811a6..32a293796c09e5254c5eb48d11fa74617b3465ac 100644
--- a/paddle/fluid/platform/nccl_test.cu
+++ b/paddle/fluid/platform/nccl_test.cu
@@ -129,9 +129,6 @@ TEST(NCCL, all_reduce) {
 }  // namespace paddle
 
 int main(int argc, char** argv) {
-  // FIXME(tonyyang-svail):
-  //   Due to the driver issue on our CI, disable for now
-  return 0;
   dev_count = paddle::platform::GetCUDADeviceCount();
   if (dev_count <= 1) {
     LOG(WARNING)
diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh
index 56fa138786104df3b67cd5248d1625509cc913d1..8ec3d0c657400165c2225238f21facfb6c84df7c 100644
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -171,7 +171,7 @@ EOF
 EOF
 
     if [[ ${WITH_GPU} == "ON"  ]]; then
-        NCCL_DEPS="apt-get install -y libnccl-dev &&"
+        NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&"
     else
         NCCL_DEPS=""
     fi
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index e8b4cec6ee638b839e2a7c38e032f74b9cd738ef..3453dd945d558a93a854f99209a6ea8055875d84 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1519,21 +1519,21 @@ def batch_norm(input,
     bias = helper.create_parameter(
         attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
 
-    mean = helper.create_global_variable(
-        name=moving_mean_name,
-        dtype=input.dtype,
+    mean = helper.create_parameter(
+        attr=ParamAttr(
+            name=moving_mean_name, initializer=Constant(0.0), trainable=False),
         shape=param_shape,
-        persistable=True,
-        stop_gradient=True)
-    helper.set_variable_initializer(var=mean, initializer=Constant(0.0))
+        dtype=input.dtype)
+    mean.stop_gradient = True
 
-    variance = helper.create_global_variable(
-        name=moving_variance_name,
-        dtype=input.dtype,
+    variance = helper.create_parameter(
+        attr=ParamAttr(
+            name=moving_variance_name,
+            initializer=Constant(1.0),
+            trainable=False),
         shape=param_shape,
-        persistable=True,
-        stop_gradient=True)
-    helper.set_variable_initializer(var=variance, initializer=Constant(1.0))
+        dtype=input.dtype)
+    variance.stop_gradient = True
 
     # create output
     # mean and mean_out share the same memory