diff --git a/doc/design/cluster_train/large_model_dist_train.md b/doc/design/cluster_train/large_model_dist_train.md
new file mode 100644
index 0000000000000000000000000000000000000000..0c4b5bc24c854b7062d509249bea9c50d42bd5f1
--- /dev/null
+++ b/doc/design/cluster_train/large_model_dist_train.md
@@ -0,0 +1,101 @@
+# Alalysis of large model distributed training in Paddle
+
+***NOTE: This is only some note for how we implemeted this scheme in V1, not a new design.***
+
+## What is it
+
+We often encounter cases that the embedding layer parameters(sparse) are so large that we can not store it in the trainer's memory when training. So we need to put them to several servers, and fetch them row by row instead of fetch all of the parameters.
+
+## How to use
+
+Specify command-line argument like  `--loadsave_parameters_in_pserver=true --ports_num_for_sparse=1  --use_old_updater=1` when starting the paddle trainer. And also add something like `--ports_num_for_sparse=1 --pserver_num_threads=5` when starting pserver processes.
+
+Accrodingly, configure your embedding layers like:
+
+```python
+SPARSE_REMOTE=True
+
+w1 = data_layer(name="w1", size=dict_size)
+emb1 = embedding_layer(input=w1, size=32, param_attr=ParameterAttribute(sparse_update=SPARSE_REMOTE))
+w2 = data_layer(name="w2", size=dict_size)
+emb2 = embedding_layer(input=w2, size=32, param_attr=ParameterAttribute(sparse_update=SPARSE_REMOTE))
+...
+```
+
+## Implementation details
+
+```c++
+enum MatType {
+  MAT_NORMAL,
+  MAT_NORMAL_SHARED,
+  MAT_VALUE_SHARED,
+  MAT_SPARSE_ROW_IDS,
+  MAT_SPARSE_ROW_AUTO_GROW,
+  MAT_CACHE_ROW,
+  MAT_SPARSE_ROW,
+  MAT_SPARSE_ROW_PREFETCH,
+  MAT_SPARSE_ROW_PREFETCH_FULL_SIZE,
+};
+```
+
+`MAT_SPARSE_ROW_PREFETCH` is what we use when configured to fetch only row of matrix when training.
+
+In `trainer_internal.cpp:L93 trainOneBatch`:
+
+```c++
+  if (config_->getOptConfig().use_sparse_remote_updater()) {
+    REGISTER_TIMER("prefetch");
+    gradientMachine_->prefetch(inArgs);
+    parameterUpdater_->getParametersRemote();
+  }
+```
+
+When doing actual network forward and backward, at the beginning of each batch, the trainer will try to download one row of data from pserver.
+
+In `trainer/RemoteParameterUpdater.cpp`: `parameterUpdater_->getParametersRemote();`:
+
+```c++
+if (fullSize) {
+    ...
+} else {
+getParams = [&] {
+    parameterClient_->getParameterSparse(
+        /* recvParameterType= */ PARAMETER_VALUE, sendBackParameterType);
+};
+applyL1 = [](Parameter& para, real decayRate) {
+    para.getMat(PARAMETER_VALUE)->applyL1(/*lr=*/1.0f, decayRate);
+};
+}
+```
+
+Calling `parameterClient_->getParameterSparse` will do remote call to pserver's `getParameterSparse`:
+
+```c++
+void ParameterServer2::getParameterSparse(const SendParameterRequest& request,
+                                          std::vector<Buffer>& inputBuffers,
+                                          SendParameterResponse* response,
+                                          std::vector<Buffer>* outputBuffers) {
+  (void)inputBuffers;
+  auto& buffer = *readWriteBuffer_;
+  size_t numReals = 0;
+  for (const auto& block : request.blocks()) {
+    numReals += getParameterConfig(block).dims(1);
+  }
+  buffer.resize(numReals);
+
+  VLOG(3) << "pserver: getParameterSparse, numReals=" << numReals;
+
+  ReadLockGuard guard(parameterMutex_);
+  size_t offset = 0;
+  for (const auto& block : request.blocks()) {
+    size_t width = getParameterConfig(block).dims(1);
+    Buffer buf = {buffer.data() + offset, width};
+    int type = request.send_back_parameter_type();
+    sendBackParameterSparse(block, type, response, &buf, width, outputBuffers);
+    offset += width;
+  }
+}
+```
+
+`getParameterConfig(block).dims(1)` returns the width of the current "parameter block"(a shard of parameter object),
+then `getParameterSparse` remote call returns only one row of data to the client.
diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh
index 2f0205b7702b6d73b5348430f39166ec78f6c143..6c2f5fed405ecc80d5388084c7774d0ffb45f9af 100644
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -82,10 +82,6 @@ EOF
 fi
 
 
-# To build documentation, we need to run cmake again after installing
-# PaddlePaddle.  This awkwardness is due to
-# https://github.com/PaddlePaddle/Paddle/issues/1854.  It also
-# describes a solution.
 if [[ ${WITH_DOC:-OFF} == "ON" ]]; then
     cat <<EOF
 ========================================
@@ -93,11 +89,6 @@ Building documentation ...
    In /paddle/build_doc
 ========================================
 EOF
-    # build documentation need install Paddle before
-    make install -j `nproc`
-    pip install /usr/local/opt/paddle/share/wheels/*.whl
-    paddle version
-
     mkdir -p /paddle/build_doc
     pushd /paddle/build_doc
     cmake .. \
@@ -106,7 +97,8 @@ EOF
           -DWITH_AVX=${WITH_AVX:-ON} \
           -DWITH_SWIG_PY=ON \
           -DWITH_STYLE_CHECK=OFF
-    make paddle_docs paddle_docs_cn
+    make -j `nproc` gen_proto_py
+    make -j `nproc` paddle_docs paddle_docs_cn
     popd
 fi
 
@@ -182,3 +174,7 @@ ADD go/cmd/master/master /usr/bin/
 # default command shows the paddle version and exit
 CMD ["paddle", "version"]
 EOF
+
+set +xe
+printf "If you need to install PaddlePaddle in develop docker image,"
+printf "please make install or pip install build/python/dist/*.whl.\n"
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index d2f064bea00ff55f29df9269c2aab7619f2a94c5..7bd6d59b0096c23bb791b9b50702130057628879 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -50,8 +50,11 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
     COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python
     DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
 
-add_custom_target(paddle_python ALL DEPENDS
-    ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel ${MKL_DEPENDS})
+set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model ${MKL_DEPENDS})
+if(WITH_SWIG_PY)
+    list(APPEND paddle_python_deps python_api_wheel)
+endif()
+add_custom_target(paddle_python ALL DEPENDS ${paddle_python_deps})
 
 set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)