Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dockerreadme

3569466d · yi.wu · 5d0f9d31 · 4db04717 · 5d0f9d31 · 3569466d
24 changed file
--- a/.dockerignore
+++ b/.dockerignore
-.gitignore
\ No newline at end of file
--- a/.dockerignore
+++ b/.dockerignore
+*.DS_Store
+build/
+*.user
+.vscode
+.idea
+.project
+.cproject
+.pydevproject
+Makefile
+.test_env/
+third_party/
+*~
+bazel-*
+
+!build/*.deb
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,20 +3,17 @@
 FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04
 MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>

-ARG DEBIAN_FRONTEND=noninteractive
 ARG UBUNTU_MIRROR
 RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'

 # ENV variables
 ARG BUILD_WOBOQ
-ARG BUILD_AND_INSTALL
 ARG WITH_GPU
 ARG WITH_AVX
 ARG WITH_DOC
 ARG WITH_STYLE_CHECK

 ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
-ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
 ENV WITH_GPU=${WITH_AVX:-OFF}
 ENV WITH_AVX=${WITH_AVX:-ON}
 ENV WITH_DOC=${WITH_DOC:-OFF}
@@ -31,7 +28,7 @@ RUN apt-get update && \
    apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
    apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
    apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
-    apt-get install -y automake locales clang-format-3.8 && \
+    apt-get install -y automake locales clang-format-3.8 swig && \
    apt-get clean -y

 # git credential to skip password typing
@@ -51,8 +48,6 @@ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
    cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
    cd .. && rm -rf cmake-3.4.1

-RUN apt-get install -y swig
-
 VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"]

 # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service

--- a/README.md
+++ b/README.md
@@ -2,8 +2,8 @@


 [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
-[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
-[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/develop/doc/)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/doc_cn/)
 [![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
 [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
 [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
@@ -59,36 +59,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
    the capability of PaddlePaddle to make a huge impact for your product.

 ## Installation
-Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
-pre-built packages (**docker image**, **deb package**) or
-directly build on **Linux** and **Mac OS X** from the source code.
+
+It is recommended to check out the
+[Docker installation guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html)
+before looking into the
+[build from source guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html)

 ## Documentation
-Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.

- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
-   You can follow the quick start tutorial to learn how use PaddlePaddle
-   step-by-step.
+We provide [English](http://www.paddlepaddle.org/develop/doc/) and
+[Chinese](http://www.paddlepaddle.org/doc_cn/) documentation.
+
+- [Deep Learning 101](http://book.paddlepaddle.org/index.en.html)
+
+  You might want to start from the this online interactive book that can run in Jupyter Notebook.
+
+- [Distributed Training](http://www.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html)
+
+  You can run distributed training jobs on MPI clusters.
+
+- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html)

- [Example and Demo](http://paddlepaddle.org/doc/demo/) <br>
-   We provide five demos, including: image classification, sentiment analysis,
-   sequence to sequence model, recommendation, semantic role labeling.
+   You can also run distributed training jobs on Kubernetes clusters.

- [Distributed Training](http://paddlepaddle.org/doc/cluster) <br>
-  This system supports training deep learning models on multiple machines
-  with data parallelism.
+- [Python API](http://www.paddlepaddle.org/develop/doc/api/index_en.html)

- [Python API](http://paddlepaddle.org/doc/ui/) <br>
-   PaddlePaddle supports using either Python interface or C++ to build your
-   system. We also use SWIG to wrap C++ source code to create a user friendly
-   interface for Python. You can also use SWIG to create interface for your
-   favorite programming language.
+   Our new API enables much shorter programs.

- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br>
-   We sincerely appreciate your interest and contributions. If you would like to
-   contribute, please read the contribution guide.
+- [How to Contribute](http://www.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html)

- [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>
+   We appreciate your contributions!

 ## Ask Questions


--- a/cmake/FindSphinx.cmake
+++ b/cmake/FindSphinx.cmake
@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
    ${source}
    ${destination}
    COMMENT "Generating sphinx documentation: ${builder}"
-    COMMAND cd ${destination} && ln -s ./index_*.html index.html
+    COMMAND cd ${destination} && ln -sf ./index_*.html index.html
    )

  set_property(

--- a/paddle/gserver/dataproviders/DataProvider.h
+++ b/paddle/gserver/dataproviders/DataProvider.h
@@ -164,15 +164,6 @@ public:
    argu.value = value;
    data_.push_back(argu);
  }
-  /**
-   * @brief Append user defined data
-   * @param[in]  ptr     user defined data
-   */
-  void appendUserDefinedPtr(UserDefinedVectorPtr ptr) {
-    Argument argu;
-    argu.udp = ptr;
-    data_.push_back(argu);
-  }

  /*
   * @brief Append argument

--- a/paddle/gserver/layers/CostLayer.cpp
+++ b/paddle/gserver/layers/CostLayer.cpp
@@ -192,6 +192,59 @@ void SumOfSquaresCostLayer::backwardImp(Matrix& output,
  outputG.sumOfSquaresBp(output, *label.value);
 }

+//
+// class SmoothL1CostLayer
+//
+
+REGISTER_LAYER(smooth_l1, SmoothL1CostLayer);
+
+bool SmoothL1CostLayer::init(const LayerMap& layerMap,
+                             const ParameterMap& parameterMap) {
+  return CostLayer::init(layerMap, parameterMap);
+}
+
+void SmoothL1CostLayer::forwardImp(Matrix& output,
+                                   Argument& label,
+                                   Matrix& target) {
+  MatrixPtr targetCpu, outputCpu, labelCpu;
+  if (useGpu_) {
+    targetCpu =
+        Matrix::create(target.getHeight(), target.getWidth(), false, false);
+    outputCpu =
+        Matrix::create(output.getHeight(), output.getWidth(), false, false);
+    labelCpu = Matrix::create(
+        label.value->getHeight(), label.value->getWidth(), false, false);
+    targetCpu->copyFrom(target);
+    outputCpu->copyFrom(output);
+    labelCpu->copyFrom(*label.value);
+    targetCpu->smoothL1(*outputCpu, *(labelCpu));
+    target.copyFrom(*targetCpu);
+  } else {
+    target.smoothL1(output, *label.value);
+  }
+}
+
+void SmoothL1CostLayer::backwardImp(Matrix& output,
+                                    Argument& label,
+                                    Matrix& outputG) {
+  MatrixPtr outputGCpu, outputCpu, labelCpu;
+  if (useGpu_) {
+    outputGCpu =
+        Matrix::create(outputG.getHeight(), outputG.getWidth(), false, false);
+    outputCpu =
+        Matrix::create(output.getHeight(), output.getWidth(), false, false);
+    labelCpu = Matrix::create(
+        label.value->getHeight(), label.value->getWidth(), false, false);
+    outputGCpu->copyFrom(outputG);
+    outputCpu->copyFrom(output);
+    labelCpu->copyFrom(*label.value);
+    outputGCpu->smoothL1Bp(*outputCpu, *labelCpu);
+    outputG.copyFrom(*outputGCpu);
+  } else {
+    outputG.smoothL1Bp(output, *label.value);
+  }
+}
+
 //
 // class RankingCost
 //

--- a/paddle/gserver/layers/CostLayer.h
+++ b/paddle/gserver/layers/CostLayer.h
@@ -159,6 +159,29 @@ public:
                   Matrix& outputGrad) override;
 };

+/**
+ * This cost layer compute smooth L1 loss for real-valued regression
+ * tasks.
+ * \f[
+ * L =
+ *   (output - label)^2 * 0.5  / -1 < (output - label) < 1 /
+ *   (output - label) - 0.5    / otherwise  /
+ * \f]
+ */
+class SmoothL1CostLayer : public CostLayer {
+public:
+  explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
+
+  void backwardImp(Matrix& outputValue,
+                   Argument& label,
+                   Matrix& outputGrad) override;
+};
+
 /**
 * A cost layer for learning to rank (LTR) task. This layer contains at leat
 * three inputs.

--- a/paddle/gserver/layers/SequencePoolLayer.cpp
+++ b/paddle/gserver/layers/SequencePoolLayer.cpp
@@ -56,17 +56,16 @@ void SequencePoolLayer::forward(PassType passType) {
  CHECK_EQ(newBatchSize_, starts->getSize() - 1);

  resetOutput(newBatchSize_, dim);
-  if (type_) {
-    CHECK(input.subSequenceStartPositions)
-        << "when trans_type = seq, input must hasSubseq";
-  }
+
  /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
   * thus, in this case, output_ has no sequenceStartPositions.
   * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
   * case, we should compute the new sequenceStartPositions.
  */
  if (type_) {
-    output_.degradeSequence(input, useGpu_);
+    CHECK(input.subSequenceStartPositions)
+        << "when trans_type = seq, input must hasSubseq";
+    output_.degradeSequence(input);
  }
 }


--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -1602,6 +1602,20 @@ TEST(Layer, PadLayer) {
  }
 }

+TEST(Layer, smooth_l1) {
+  TestConfig config;
+  config.layerConfig.set_type("smooth_l1");
+
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
+  config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 1, 0});
+  config.layerConfig.add_inputs();
+  config.layerConfig.add_inputs();
+
+  for (auto useGpu : {false, true}) {
+    testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0);
+  }
+}
+
 int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  initMain(argc, argv);

--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -3590,6 +3590,55 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) {
  }
 }

+void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
+  CHECK(output.useGpu_ == false && label.useGpu_ == false)
+      << "Matrix type are not equal";
+
+  size_t numSamples = getHeight();
+  size_t dim = output.getWidth();
+  CHECK_EQ(label.getHeight(), numSamples);
+  CHECK_EQ(output.getHeight(), numSamples);
+  CHECK_EQ(label.getWidth(), dim);
+  CHECK_EQ(getWidth(), (size_t)1);
+  real* out = output.getData();
+  real* cost = getData();
+  real* lbl = label.getData();
+
+  for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
+    for (size_t j = 0; j < dim; ++j) {
+      cost[j] = std::fabs(out[j] - lbl[j]);
+      if (cost[j] < 1.0)
+        cost[j] = 0.5 * cost[j] * cost[j];
+      else
+        cost[j] = cost[j] - 0.5;
+    }
+  }
+}
+
+void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) {
+  CHECK(output.useGpu_ == false && label.useGpu_ == false)
+      << "Matrix type are not equal";
+
+  size_t numSamples = getHeight();
+  size_t dim = output.getWidth();
+  CHECK_EQ(label.getHeight(), numSamples);
+  CHECK_EQ(output.getHeight(), numSamples);
+  CHECK_EQ(label.getWidth(), dim);
+  CHECK_EQ(getWidth(), (size_t)1);
+  real* out = output.getData();
+  real* cost = getData();
+  real* lbl = label.getData();
+
+  // f'(x) = x         if |x| < 1
+  //       = sign(x)   otherwise
+  for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
+    for (size_t j = 0; j < dim; ++j) {
+      cost[j] = out[j] - lbl[j];
+      if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0);
+    }
+  }
+}
+
 void CpuMatrix::tanh(Matrix& output) {
  CHECK(isContiguous());
  CHECK(output.isContiguous());

--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -783,6 +783,14 @@ public:
    LOG(FATAL) << "Not implemented";
  }

+  virtual void smoothL1(Matrix& output, Matrix& label) {
+    LOG(FATAL) << "Not implemented";
+  }
+
+  virtual void smoothL1Bp(Matrix& outputV, Matrix& label) {
+    LOG(FATAL) << "Not implemented";
+  }
+
  virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; }

  virtual void tanhDerivative(Matrix& output) {
@@ -1720,6 +1728,9 @@ public:
  /// gradient of sumOfSquares.
  void sumOfSquaresBp(Matrix& outputV, Matrix& label);

+  void smoothL1(Matrix& output, Matrix& label);
+  void smoothL1Bp(Matrix& output, Matrix& label);
+
  void tanh(Matrix& output);
  void tanhDerivative(Matrix& output);


--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -123,46 +123,6 @@ static void resizeAndCopy(ICpuGpuVectorPtr& dest,
  }
 }

-static void resizeAndCopy(UserDefinedVectorPtr& dest,
-                          const UserDefinedVectorPtr& src,
-                          bool useGpu,
-                          hl_stream_t stream) {
-  if (src) {
-    CHECK(!useGpu) << "not implemented";
-    size_t height = src->size();
-    if (!dest) {
-      dest = std::make_shared<std::vector<void*>>(height);
-    } else {
-      dest->resize(height);
-    }
-    std::copy_n(src->begin(), height, dest->begin());
-  } else {
-    dest.reset();
-  }
-}
-
-static void resizeAndCopy(UserDefinedVectorPtr& dest,
-                          const UserDefinedVectorPtr& src,
-                          int32_t startPos,
-                          int32_t copySize,
-                          bool useGpu,
-                          hl_stream_t stream = HPPL_STREAM_DEFAULT) {
-  if (src) {
-    CHECK(!useGpu) << "not implemented";
-    CHECK_LE((size_t)startPos + copySize, src->size());
-
-    size_t height = copySize;
-    if (!dest) {
-      dest = std::make_shared<std::vector<void*>>(height);
-    } else {
-      dest->resize(height);
-    }
-    std::copy_n(src->begin() + startPos, height, dest->begin());
-  } else {
-    dest.reset();
-  }
-}
-
 static void resizeAndCopy(SVectorPtr& dest,
                          const SVectorPtr& src,
                          bool useGpu,
@@ -223,7 +183,6 @@ void Argument::resizeAndCopyFrom(const Argument& src,
                  false /* useGpu */,
                  stream);
  }
-  resizeAndCopy(udp, src.udp, useGpu, stream);
  resizeAndCopy(strs, src.strs, useGpu, stream);
  frameWidth = src.frameWidth;
  frameHeight = src.frameHeight;
@@ -255,7 +214,6 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
    resizeAndCopy(value, src.value, startRow, copySize, useGpu, stream);
    resizeAndCopy(grad, src.grad, startRow, copySize, useGpu, stream);
    resizeAndCopy(ids, src.ids, startRow, copySize, useGpu, stream);
-    resizeAndCopy(udp, src.udp, startRow, copySize, useGpu, stream);
    resizeAndCopy(strs, src.strs, startRow, copySize, useGpu, stream);
    return copySize;
  } else {
@@ -268,7 +226,6 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
    resizeAndCopy(value, src.value, startRow, copyFeatureSize, useGpu, stream);
    resizeAndCopy(grad, src.grad, startRow, copyFeatureSize, useGpu, stream);
    resizeAndCopy(ids, src.ids, startRow, copyFeatureSize, useGpu, stream);
-    resizeAndCopy(udp, src.udp, startRow, copySize, useGpu, stream);
    resizeAndCopy(sequenceStartPositions,
                  src.sequenceStartPositions,
                  startSeq,
@@ -583,7 +540,7 @@ void Argument::checkSubset() const {
  }
 }

-void Argument::degradeSequence(const Argument& input, bool useGpu) {
+void Argument::degradeSequence(const Argument& input) {
  CHECK_EQ(input.hasSubseq(), 1UL);
  size_t numSequences = input.getNumSequences();
  size_t numSubSequences = input.getNumSubSequences();

--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -24,8 +24,6 @@ limitations under the License. */

 namespace paddle {

-// vector of user defined pointers
-typedef std::shared_ptr<std::vector<void*>> UserDefinedVectorPtr;
 typedef std::shared_ptr<std::vector<std::string>> SVectorPtr;

 struct Argument {
@@ -40,7 +38,6 @@ struct Argument {
        sequenceStartPositions(nullptr),
        subSequenceStartPositions(nullptr),
        cpuSequenceDims(nullptr),
-        udp(nullptr),
        deviceId(-1),
        allCount(0),
        valueCount(0),
@@ -63,7 +60,6 @@ struct Argument {
    sequenceStartPositions = argument.sequenceStartPositions;
    subSequenceStartPositions = argument.subSequenceStartPositions;
    cpuSequenceDims = argument.cpuSequenceDims;
-    udp = argument.udp;
    deviceId = argument.deviceId;
    allCount = argument.allCount;
    frameHeight = argument.frameHeight;
@@ -96,8 +92,6 @@ struct Argument {
  // dimension of sequence, stored only in CPU
  IVectorPtr cpuSequenceDims;

-  UserDefinedVectorPtr udp;  // user defined pointer
-
  int deviceId;            // the GPU device id which the argument in
  int allCount;            // the number of output layers using this argument
  mutable int valueCount;  // waiting this member when layer do forward
@@ -137,7 +131,6 @@ struct Argument {
    if (ids) return ids->getSize();
    if (grad) return grad->getHeight();
    if (in) return in->getHeight();
-    if (udp) return udp->size();
    if (strs) return strs->size();
    return 0;
  }
@@ -296,7 +289,7 @@ struct Argument {
  /*
   sequence has sub-sequence degrades to a sequence.
   */
-  void degradeSequence(const Argument& input, bool useGpu);
+  void degradeSequence(const Argument& input);

  /**
   * @brief getValueString will return the argument's output in string. There

--- a/paddle/scripts/docker/README.md
+++ b/paddle/scripts/docker/README.md
@@ -109,7 +109,7 @@ This command mounts the source directory on the host into `/paddle` in the conta
 Users can specify the following Docker build arguments with either "ON" or "OFF" value:
 - `WITH_GPU`: ***Required***. Generates NVIDIA CUDA GPU code and relies on CUDA libraries.
 - `WITH_AVX`: ***Required***. Set to "OFF" prevents from generating AVX instructions. If you don't know what is AVX, you might want to set "ON".
- `TEST`: ***Optional, default ON***. Build unit tests and run them after building.
+- `TEST`: ***Optional, default OFF***. Build unit tests and run them after building.
 - `BUILD_AND_INSTALL`: ***Optional, default ON***. Run `make` and `make install`.
 - `DELETE_BUILD_CACHE`: ***Optional, default ON***. If set to "ON", the building script will delete, download, and re-build third party libraries.


--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
 #!/bin/bash

-function abort(){
-    echo "An error occurred. Exiting..." 1>&2
-    exit 1
-}
-
-trap 'abort' 0
 set -e
-mkdir -p /paddle/dist/cpu
-mkdir -p /paddle/dist/gpu
-mkdir -p /paddle/dist/cpu-noavx
-mkdir -p /paddle/dist/gpu-noavx
-# Set BASE_IMAGE and DEB_PATH according to env variables
+
+# Set BASE_IMAGE according to env variables
 if [ ${WITH_GPU} == "ON" ]; then
  BASE_IMAGE="nvidia/cuda:7.5-cudnn5-runtime-ubuntu14.04"
  # additional packages to install when building gpu images
  GPU_DOCKER_PKG="python-pip python-dev"
-  if [ ${WITH_AVX} == "ON" ]; then
-    DEB_PATH="dist/gpu/"
-  else
-    DEB_PATH="dist/gpu-noavx/"
-  fi
 else
  BASE_IMAGE="python:2.7.13-slim"
-  if [ ${WITH_AVX} == "ON" ]; then
-    DEB_PATH="dist/cpu/"
-  else
-    DEB_PATH="dist/cpu-noavx/"
-  fi
 fi
-# If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied
-# source tree to /paddle, and this scripts should build it into
-# /paddle/build.
-if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then
-    if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
-	ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so
-    fi

-    mkdir -p /paddle/build # -p means no error if exists
-    cd /paddle/build
-    # clean local cmake and third_party cache
-    if [ ${DELETE_BUILD_CACHE:-ON} == 'ON' ]; then
-      rm -rf * && rm -rf ../third_party
-    fi
-    cmake .. \
-	  -DWITH_DOC=${WITH_DOC:-OFF} \
-	  -DWITH_GPU=${WITH_GPU:-OFF} \
-	  -DWITH_AVX=${WITH_AVX:-OFF} \
-	  -DWITH_SWIG_PY=ON \
-	  -DCUDNN_ROOT=/usr/ \
-	  -DWITH_STYLE_CHECK=OFF \
-    -DON_COVERALLS=${TEST:-ON} \
-	  -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-    make -j `nproc`
-    if [ ${TEST:-ON} == "ON" ]; then
-      make coveralls
-    fi
-    make install
-    # generate deb package for current build
-    # FIXME(typhoonzero): should we remove paddle/scripts/deb ?
-    # FIXME: CPACK_DEBIAN_PACKAGE_DEPENDS removes all dev dependencies, must
-    # install them in docker
-    cpack -D CPACK_GENERATOR='DEB' -D CPACK_DEBIAN_PACKAGE_DEPENDS="" ..
-    mv /paddle/build/*.deb /paddle/${DEB_PATH}
-
-    if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then
-        apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev
-        # Install woboq_codebrowser.
-        git clone https://github.com/woboq/woboq_codebrowser /woboq
-        cd /woboq
-        cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \
-        -DCMAKE_BUILD_TYPE=Release \
-        .
-        make
+DOCKERFILE_GPU_ENV=""
+if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
+    DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+fi

-        export WOBOQ_OUT=/usr/share/nginx/html/paddle
-        export BUILD_DIR=/paddle/build
-        mkdir -p $WOBOQ_OUT
-        cp -rv /woboq/data $WOBOQ_OUT/../data
-        /woboq/generator/codebrowser_generator \
+mkdir -p /paddle/build
+cd /paddle/build
+
+# build script will not fail if *.deb does not exist
+rm *.deb || true
+
+cmake .. \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DWITH_DOC=${WITH_DOC:-OFF} \
+      -DWITH_GPU=${WITH_GPU:-OFF} \
+      -DWITH_AVX=${WITH_AVX:-OFF} \
+      -DWITH_SWIG_PY=ON \
+      -DCUDNN_ROOT=/usr/ \
+      -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \
+      -DON_COVERALLS=${TEST:-OFF} \
+      -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
+make -j `nproc`
+if [[ ${TEST:-OFF} == "ON" ]]; then
+    make coveralls
+fi
+make install
+
+# generate deb package for current build
+# FIXME(typhoonzero): should we remove paddle/scripts/deb ?
+# FIXME: CPACK_DEBIAN_PACKAGE_DEPENDS removes all dev dependencies, must
+# install them in docker
+cpack -D CPACK_GENERATOR='DEB' -D CPACK_DEBIAN_PACKAGE_DEPENDS="" ..
+
+if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then
+    apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev
+    # Install woboq_codebrowser.
+    git clone https://github.com/woboq/woboq_codebrowser /woboq
+    cd /woboq
+    cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \
+          -DCMAKE_BUILD_TYPE=Release \
+          .
+    make
+
+    export WOBOQ_OUT=/usr/share/nginx/html/paddle
+    export BUILD_DIR=/paddle/build
+    mkdir -p $WOBOQ_OUT
+    cp -rv /woboq/data $WOBOQ_OUT/../data
+    /woboq/generator/codebrowser_generator \
        -b /paddle/build \
        -a \
        -o $WOBOQ_OUT \
        -p paddle:/paddle
-        /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT
-        cd /woboq
-        make clean
-    fi
-
-    pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl
-    pip install /usr/local/opt/paddle/share/wheels/paddle*.whl
-    paddle version
-
-    if [[ ${DOCKER_BUILD:-FALSE} == 'TRUE' ]]; then
-	# reduce docker image size
-	rm -rf /paddle/build
-	rm -rf /usr/local/opt/paddle/share/wheels/
-    fi
+    /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT
+    cd /woboq
+    make clean
 fi

+paddle version
+
 # generate production docker image Dockerfile
 if [ ${USE_MIRROR} ]; then
  MIRROR_UPDATE="sed 's@http:\/\/archive.ubuntu.com\/ubuntu\/@mirror:\/\/mirrors.ubuntu.com\/mirrors.txt@' -i /etc/apt/sources.list && \\"
@@ -109,22 +80,9 @@ fi
 cat > /paddle/build/Dockerfile <<EOF
 FROM ${BASE_IMAGE}
 MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
-
-# ENV variables
-ARG WITH_AVX
-ARG WITH_DOC
-ARG WITH_STYLE_CHECK
-
-ENV WITH_GPU=${WITH_GPU}
-ENV WITH_AVX=${WITH_AVX}
-ENV WITH_DOC=\${WITH_DOC:-OFF}
-ENV WITH_STYLE_CHECK=\${WITH_STYLE_CHECK:-OFF}
-
 ENV HOME /root
 ENV LANG en_US.UTF-8
-
 # Use Fix locales to en_US.UTF-8
-
 RUN ${MIRROR_UPDATE}
    apt-get update && \
    apt-get install -y libgfortran3 libpython2.7 ${GPU_DOCKER_PKG} && \
@@ -132,13 +90,10 @@ RUN ${MIRROR_UPDATE}
    pip install --upgrade pip && \
    pip install -U 'protobuf==3.1.0' requests numpy
 # Use different deb file when building different type of images
-ADD \$PWD/${DEB_PATH}*.deb /usr/local/opt/paddle/deb/
+ADD build/*.deb /usr/local/opt/paddle/deb/
 # run paddle version to install python packages first
 RUN dpkg -i /usr/local/opt/paddle/deb/*.deb && rm -f /usr/local/opt/paddle/deb/*.deb && paddle version
-
-ENV PATH="/usr/local/opt/paddle/bin/:${PATH}"
+${DOCKERFILE_GPU_ENV}
 # default command shows the paddle version and exit
 CMD ["paddle", "version"]
 EOF
-
-trap : 0
--- a/paddle/utils/CpuId.h
+++ b/paddle/utils/CpuId.h
@@ -12,6 +12,7 @@ limitations under the License. */
 #pragma once

 #include "Common.h"
+#include "Error.h"

 namespace paddle {

@@ -97,4 +98,37 @@ private:
 #define HAS_AVX512  HAS_SIMD(SIMD_AVX512)
 // clang-format on

+/**
+ * Invoke checkCPUFeature() before Paddle initialization to
+ * check target machine whether support compiled instructions.
+ * If not, simply throw out an error.
+ */
+inline Error __must_check checkCPUFeature() {
+  Error err;
+#ifndef __AVX__
+  if (HAS_AVX) {
+    LOG(WARNING) << "PaddlePaddle wasn't compiled to use avx instructions, "
+                 << "but these are available on your machine and could "
+                 << "speed up CPU computations via CMAKE .. -DWITH_AVX=ON";
+  }
+#else
+  if (!HAS_AVX) {
+    err = Error(
+        "PaddlePaddle was compiled to use avx instructions, "
+        "but these aren't available on your machine, please "
+        "disable it via CMAKE .. -DWITH_AVX=OFF");
+  }
+#endif  // __AVX__
+#ifdef __SSE3__
+  if (!HAS_SSE3) {
+    err = Error(
+        "PaddlePaddle was compiled to use sse3 instructions, "
+        "which is the minimum requirement of PaddlePaddle. "
+        "But these aren't available on your current machine.");
+  }
+#endif  // __SSE3__
+
+  return err;
+}
+
 }  // namespace paddle
--- a/paddle/utils/Util.cpp
+++ b/paddle/utils/Util.cpp
@@ -26,6 +26,7 @@ limitations under the License. */

 #include <gflags/gflags.h>

+#include "CpuId.h"
 #include "CustomStackTrace.h"
 #include "Logging.h"
 #include "StringUtil.h"
@@ -185,6 +186,7 @@ void initMain(int argc, char** argv) {
  }

  version::printVersion();
+  checkCPUFeature().check();
  runInitFunctions();
 }


--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2222,7 +2222,10 @@ def Link(

 # memory for recurrent layer group.
 # *name* and *size* are actual layer's name and size.
-# will return name of the memory,
+# If *name* is None, need to provide *memory_name* and need to use
+# SetMemoryInput() later to specify the layer which this memory remembers.
+#
+# return the name of the memory,
 # use this name if you assign the memory as other layer's input
 #
 # boot frame of memory is zeroed by default,
@@ -2234,15 +2237,18 @@ def Link(
 # can only be initailized by a *boot_layer* which is a sequence.
 #
 @config_func
-def Memory(
-        name,
-        size,
-        is_sequence=False,
-        boot_layer=None,
-        boot_bias=False,
-        boot_bias_active_type="",
-        boot_with_const_id=None, ):
-    agent_name = name + "+delay1"
+def Memory(name,
+           size,
+           is_sequence=False,
+           boot_layer=None,
+           boot_bias=False,
+           boot_bias_active_type="",
+           boot_with_const_id=None,
+           memory_name=None):
+    if not memory_name:
+        config_assert(name is not None, "name needs cannot be None")
+        memory_name = name + "+delay1"
+    agent_name = memory_name
    if is_sequence:
        agent_layer = SequenceAgentLayer(agent_name, size)
    else:
@@ -2250,7 +2256,8 @@ def Memory(
    config_assert(g_current_submodel.is_recurrent_layer_group,
                  'Memory should be used in recurrent layer group only')
    memory = g_current_submodel.memories.add()
-    memory.layer_name = MakeLayerNameInSubmodel(name)
+    if name is not None:
+        memory.layer_name = MakeLayerNameInSubmodel(name)
    memory.link_name = MakeLayerNameInSubmodel(agent_name)
    memory.is_sequence = is_sequence
    options = sum((boot_layer is not None, bool(boot_bias),
@@ -2274,6 +2281,17 @@ def Memory(
    return agent_name


+@config_func
+def SetMemoryInput(memory_name, layer_name):
+    memory_name = MakeLayerNameInSubmodel(memory_name)
+    layer_name = MakeLayerNameInSubmodel(layer_name)
+    for mem in g_current_submodel.memories:
+        if mem.link_name == memory_name:
+            mem.layer_name = layer_name
+            return
+    logger.fatal("Nonexistent memory name: " + memory_name)
+
+
 # Generator for recurrent layer group, to use it:
 #  1. define a id layer as output of layer group
 #  2. define a memory of this id layer, and assign a boot id(begin of sequence)

--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
@@ -97,13 +97,13 @@ def reset_hook():
 register_parse_config_hook(reset_hook)


-def wrap_name_default(name_prefix=None):
+def wrap_name_default(name_prefix=None, name_param="name"):
    """
    Decorator to set "name" arguments default to "{name_prefix}_{invoke_count}".

    ..  code:: python

-        @default_name("some_name")
+        @wrap_name_default("some_name")
        def func(name=None):
            print name      # name will never be None. If name is not set,
                            # name will be "some_name_%d"
@@ -115,7 +115,7 @@ def wrap_name_default(name_prefix=None):
    """
    factory = DefaultNameFactory(name_prefix)
    _name_factories.append(factory)
-    return wrap_param_default(["name"], factory)
+    return wrap_param_default([name_param], factory)


 def wrap_param_attr_default(param_names=None, default_factory=None):

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -288,6 +288,14 @@ class LayerOutput(object):
        """
        assert False, "this method should not be invoked"

+    def set_input(self, input):
+        """
+        Set the input for a memory layer. Can only be used for memory layer
+        """
+        assert isinstance(input, LayerOutput)
+        assert self.layer_type == LayerType.MEMORY
+        SetMemoryInput(self.name, input.name)
+

 ERROR_CLIPPING = 'error_clipping_threshold'
 DROPOUT = 'drop_rate'
@@ -2759,8 +2767,10 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
        size=a.size)


+@wrap_name_default("memory", "memory_name")
 def memory(name,
           size,
+           memory_name=None,
           is_seq=False,
           boot_layer=None,
           boot_bias=None,
@@ -2782,14 +2792,32 @@ def memory(name,
    If boot_layer is not null, the memory is just the boot_layer's output.
    Set :code:`is_seq` is true boot layer is sequence.

-
    The same name layer in recurrent group will set memory on each time
    step.

-    :param name: memory's name.
+    .. code-block:: python
+
+       mem = memory(size=256, name='state')
+       state = fc_layer(input=mem, size=256, name='state')
+
+    If you do not want to specify the name, you can equivalently use set_input()
+    to specify the layer needs to be remembered as the following:
+
+    .. code-block:: python
+       mem = memory(size=256)
+       state = fc_layer(input=mem, size=256)
+       mem.set_input(mem)
+
+
+    :param name: the name of the layer which this memory remembers.
+                 If name is None, user should call set_input() to specify the
+                 name of the layer which this memory remembers.
    :type name: basestring
    :param size: size of memory.
    :type size: int
+    :param memory_name: the name of the memory.
+                        It is ignored when name is provided.
+    :type memory_name: basestring
    :param is_seq: is sequence for boot_layer
    :type is_seq: bool
    :param boot_layer: boot layer of memory.
@@ -2811,13 +2839,21 @@ def memory(name,
        boot_bias = ParamAttr.to_bias(boot_bias)

    assert boot_layer is None or isinstance(boot_layer, LayerOutput)
+    if name is not None:
+        memory_name = None

-    agent_name = Memory(name, size, is_seq, boot_layer.name
-                        if boot_layer is not None else None, boot_bias,
-                        boot_bias_active_type.name, boot_with_const_id)
+    memory_name = Memory(
+        name,
+        size,
+        is_sequence=is_seq,
+        boot_layer=boot_layer.name if boot_layer is not None else None,
+        boot_bias=boot_bias,
+        boot_bias_active_type=boot_bias_active_type.name,
+        boot_with_const_id=boot_with_const_id,
+        memory_name=memory_name)

    lout = LayerOutput(
-        name=agent_name,
+        name=memory_name,
        size=size,
        layer_type=LayerType.MEMORY,
        parents=[boot_layer] if boot_layer is not None else None)
@@ -3565,7 +3601,7 @@ def __cost_input__(input, label, weight=None):
    ipts = [Input(input.name), Input(label.name)]
    parents = [input, label]
    if weight is not None:
-        assert weight.layer_type == LayerType.DATA
+        assert weight.size == 1
        ipts.append(Input(weight.name))
        parents.append(weight)
    return ipts, parents
@@ -4946,7 +4982,12 @@ def lambda_cost(input,

 @wrap_name_default()
 @layer_support()
-def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
+def cross_entropy(input,
+                  label,
+                  name=None,
+                  coeff=1.0,
+                  weight=None,
+                  layer_attr=None):
    """
    A loss layer for multi class entropy.

@@ -4961,22 +5002,27 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
    :type input: LayerOutput.
    :param name: The name of this layers. It is not necessary.
    :type name: None|basestring.
-    :param coeff: The coefficient affects the gradient in the backward.
+    :param coeff: The cost is multiplied with coeff.
+                  The coefficient affects the gradient in the backward.
    :type coeff: float.
+    :param weight: The cost of each sample is multiplied with each weight.
+                   The weight should be a layer with size=1. Note that gradient
+                   will not be calculated for weight.
+    :type weight: LayerOutout
    :param layer_attr: Extra Layer Attribute.
    :type layer_attr: ExtraLayerAttribute
    :return: LayerOutput object.
    :rtype: LayerOutput.
    """

+    ipts, parents = __cost_input__(input, label, weight)
    Layer(
        name=name,
        type=LayerType.CROSS_ENTROPY,
-        inputs=[input.name, label.name],
+        inputs=ipts,
        coeff=coeff,
        **ExtraLayerAttribute.to_kwargs(layer_attr))
-    return LayerOutput(
-        name, LayerType.CROSS_ENTROPY, parents=[input, label], size=1)
+    return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)


 @wrap_name_default()

--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
@@ -331,6 +331,54 @@ layers {
  }
  trans_type: "non-seq"
 }
+layers {
+  name: "__recurrent_group_3__"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "seq_input@__recurrent_group_3__"
+  type: "scatter_agent"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__memory_6__@__recurrent_group_3__"
+  type: "agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__fc_layer_0__@__recurrent_group_3__"
+  type: "fc"
+  size: 200
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "seq_input@__recurrent_group_3__"
+    input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w0"
+  }
+  inputs {
+    input_layer_name: "__memory_6__@__recurrent_group_3__"
+    input_parameter_name: "___fc_layer_0__@__recurrent_group_3__.w1"
+  }
+  bias_parameter_name: "___fc_layer_0__@__recurrent_group_3__.wbias"
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "gather_agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__last_seq_4__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+  }
+  trans_type: "non-seq"
+}
 parameters {
  name: "___mixed_0__.w0"
  size: 40000
@@ -481,6 +529,36 @@ parameters {
  initial_strategy: 0
  initial_smart: false
 }
+parameters {
+  name: "___fc_layer_0__@__recurrent_group_3__.w0"
+  size: 20000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_0__@__recurrent_group_3__.w1"
+  size: 40000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 200
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_0__@__recurrent_group_3__.wbias"
+  size: 200
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 200
+  initial_strategy: 0
+  initial_smart: false
+}
 input_layer_names: "seq_input"
 input_layer_names: "sub_seq_input"
 output_layer_names: "__last_seq_0__"
@@ -488,6 +566,7 @@ output_layer_names: "__first_seq_0__"
 output_layer_names: "__last_seq_1__"
 output_layer_names: "__last_seq_2__"
 output_layer_names: "__last_seq_3__"
+output_layer_names: "__last_seq_4__"
 sub_models {
  name: "root"
  layer_names: "seq_input"
@@ -510,6 +589,9 @@ sub_models {
  layer_names: "__gru_group_0___recurrent_group"
  layer_names: "__gru_group_0__"
  layer_names: "__last_seq_3__"
+  layer_names: "__recurrent_group_3__"
+  layer_names: "__fc_layer_0__"
+  layer_names: "__last_seq_4__"
  input_layer_names: "seq_input"
  input_layer_names: "sub_seq_input"
  output_layer_names: "__last_seq_0__"
@@ -517,6 +599,7 @@ sub_models {
  output_layer_names: "__last_seq_1__"
  output_layer_names: "__last_seq_2__"
  output_layer_names: "__last_seq_3__"
+  output_layer_names: "__last_seq_4__"
  is_recurrent_layer_group: false
 }
 sub_models {
@@ -647,4 +730,28 @@ sub_models {
  }
  target_inlinkid: -1
 }
+sub_models {
+  name: "__recurrent_group_3__"
+  layer_names: "seq_input@__recurrent_group_3__"
+  layer_names: "__memory_6__@__recurrent_group_3__"
+  layer_names: "__fc_layer_0__@__recurrent_group_3__"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "__fc_layer_0__@__recurrent_group_3__"
+    link_name: "__memory_6__@__recurrent_group_3__"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "seq_input"
+    link_name: "seq_input@__recurrent_group_3__"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "__fc_layer_0__@__recurrent_group_3__"
+    link_name: "__fc_layer_0__"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}

--- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
@@ -16,6 +16,16 @@ def generate_rnn_simple(name):
    return rnn_simple


+def generate_rnn_simple_no_name():
+    def rnn_simple(s):
+        m = memory(name=None, size=200)
+        fc = fc_layer(input=[s, m], size=200)
+        m.set_input(fc)
+        return fc
+
+    return rnn_simple
+
+
 with mixed_layer() as lstm_param:  # test lstm unit, rnn group
    lstm_param += full_matrix_projection(input=seq, size=100 * 4)

@@ -33,4 +43,6 @@ outputs(
    last_seq(input=lstmemory_group(
        input=lstm_param, size=100)),
    last_seq(input=gru_group(
-        input=gru_param, size=100)))
+        input=gru_param, size=100)),
+    last_seq(input=recurrent_group(
+        step=generate_rnn_simple_no_name(), input=seq)), )
--- a/python/paddle/v2/tests/test_layer.py
+++ b/python/paddle/v2/tests/test_layer.py
@@ -22,7 +22,9 @@ import paddle.v2.networks as networks

 pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
 label = layer.data(name='label', type=data_type.integer_value(10))
-weight = layer.data(name='weight', type=data_type.dense_vector(10))
+weight = layer.data(name='weight', type=data_type.dense_vector(1))
+combine_weight = layer.data(
+    name='weight_combine', type=data_type.dense_vector(10))
 score = layer.data(name='score', type=data_type.dense_vector(1))

 hidden = layer.fc(input=pixel,
@@ -81,7 +83,8 @@ class AggregateLayerTest(unittest.TestCase):
 class MathLayerTest(unittest.TestCase):
    def test_math_layer(self):
        addto = layer.addto(input=[pixel, pixel])
-        linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10)
+        linear_comb = layer.linear_comb(
+            weights=combine_weight, vectors=hidden, size=10)
        interpolation = layer.interpolation(
            input=[hidden, hidden], weight=score)
        bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)