diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake
index 1c29cb22a31f1e41a6b5575837c6374175cfdea5..f74cd4ff8c9c2c52319b18ac37264167b3718eae 100644
--- a/cmake/FindSphinx.cmake
+++ b/cmake/FindSphinx.cmake
@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
     ${source}
     ${destination}
     COMMENT "Generating sphinx documentation: ${builder}"
-    COMMAND cd ${destination} && ln -s ./index_*.html index.html
+    COMMAND cd ${destination} && ln -sf ./index_*.html index.html
     )
 
   set_property(
diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp
index 35260ca912d5d0e00213ffb7074bd8963da265da..5807c4249620db44fed82a6bb69a77d807d9f0a0 100644
--- a/paddle/gserver/layers/SequencePoolLayer.cpp
+++ b/paddle/gserver/layers/SequencePoolLayer.cpp
@@ -56,17 +56,16 @@ void SequencePoolLayer::forward(PassType passType) {
   CHECK_EQ(newBatchSize_, starts->getSize() - 1);
 
   resetOutput(newBatchSize_, dim);
-  if (type_) {
-    CHECK(input.subSequenceStartPositions)
-        << "when trans_type = seq, input must hasSubseq";
-  }
+
   /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
    * thus, in this case, output_ has no sequenceStartPositions.
    * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
    * case, we should compute the new sequenceStartPositions.
   */
   if (type_) {
-    output_.degradeSequence(input, useGpu_);
+    CHECK(input.subSequenceStartPositions)
+        << "when trans_type = seq, input must hasSubseq";
+    output_.degradeSequence(input);
   }
 }
 
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 7a343cca33f5b420be6192231ac73ca1c2da5fb9..2f025f729087286274b35cd3b0396a4bd13115d1 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -583,7 +583,7 @@ void Argument::checkSubset() const {
   }
 }
 
-void Argument::degradeSequence(const Argument& input, bool useGpu) {
+void Argument::degradeSequence(const Argument& input) {
   CHECK_EQ(input.hasSubseq(), 1UL);
   size_t numSequences = input.getNumSequences();
   size_t numSubSequences = input.getNumSubSequences();
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 9ef44be0cb3b960db1e789f3f26bb66d1fe63c81..129b7c4f8bdcf566845887ad2b4638ea944f915a 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -296,7 +296,7 @@ struct Argument {
   /*
    sequence has sub-sequence degrades to a sequence.
    */
-  void degradeSequence(const Argument& input, bool useGpu);
+  void degradeSequence(const Argument& input);
 
   /**
    * @brief getValueString will return the argument's output in string. There
diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py
index f5a16d51477f9cfbf0cd32af54098406fbbd2b41..c686870a497668517d1c78c11c616ad8a71a2980 100644
--- a/python/paddle/v2/dataset/wmt14.py
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -23,7 +23,7 @@ __all__ = ['train', 'test', 'build_dict']
 URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
 MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
 # this is a small set of data for test. The original data is too large and will be add later.
-URL_TRAIN = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
+URL_TRAIN = 'http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
 MD5_TRAIN = 'a755315dd01c2c35bde29a744ede23a6'
 
 START = "<s>"
diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py
index 5ccd3d6913e1755a37b4da7c4f182147b880d3cb..89cc928dd7f624612ba717b4e5c2d6c2de7f8bed 100644
--- a/python/paddle/v2/tests/test_layer.py
+++ b/python/paddle/v2/tests/test_layer.py
@@ -22,7 +22,9 @@ import paddle.v2.networks as networks
 
 pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
 label = layer.data(name='label', type=data_type.integer_value(10))
-weight = layer.data(name='weight', type=data_type.dense_vector(10))
+weight = layer.data(name='weight', type=data_type.dense_vector(1))
+combine_weight = layer.data(
+    name='weight_combine', type=data_type.dense_vector(10))
 score = layer.data(name='score', type=data_type.dense_vector(1))
 
 hidden = layer.fc(input=pixel,
@@ -81,7 +83,8 @@ class AggregateLayerTest(unittest.TestCase):
 class MathLayerTest(unittest.TestCase):
     def test_math_layer(self):
         addto = layer.addto(input=[pixel, pixel])
-        linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10)
+        linear_comb = layer.linear_comb(
+            weights=combine_weight, vectors=hidden, size=10)
         interpolation = layer.interpolation(
             input=[hidden, hidden], weight=score)
         bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)