Merge branch 'develop' of https://github.com/paddlepaddle/paddle into reformat-paddle-operators

b8ff8275 · Yi Wang · 9620df44 · dc21a58b · b8ff8275 · b8ff8275
4 changed file
--- a/paddle/trainer/tests/compare_sparse_data
+++ b/paddle/trainer/tests/compare_sparse_data
--- a/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf
+++ b/paddle/trainer/tests/sample_trainer_config_compare_sparse.conf
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
+# Note: when making change to this file, please make sure
+# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
+# for comparing these two nets can pass (test_CompareTwoNets)
+default_initial_std(0.1)
+default_device(0)
+word_dim = 999
+l1 = 0
+l2 = 0
+model_type("nn")
+sparse_update = get_config_arg("sparse_update", bool, False)
+TrainData(ProtoData(        
+            type = "proto_sequence",
+            files = ('trainer/tests/train_sparse.list'), 
+            ))
+Settings(
+    algorithm='sgd',
+    batch_size=100,
+    learning_rate=0.0001,
+    learning_rate_decay_a=4e-08,
+    learning_rate_decay_b=0.0,
+    learning_rate_schedule='poly',
+)
+wordvec_dim = 32
+layer2_dim = 16
+layer3_dim = 16
+hidden_dim = 32
+slot_names = ["qb", "qw", "tb", "tw"]
+def ltr_network(network_name,
+                word_dim=word_dim,
+                wordvec_dim=wordvec_dim,
+                layer2_dim=layer2_dim,
+                layer3_dim=layer3_dim,
+                hidden_dim=hidden_dim,
+                slot_names=slot_names,
+                l1=l1,
+                l2=l2):
+    slotnum = len(slot_names)
+    for i in xrange(slotnum):
+        Inputs(slot_names[i] + network_name)
+    for i in xrange(slotnum):
+        Layer(
+            name = slot_names[i] + network_name,
+            type = "data",
+            size = word_dim,
+            device = -1,
+        )
+        Layer(
+            name = slot_names[i] + "_embedding_" + network_name,
+            type = "mixed",
+            size = wordvec_dim,
+            bias = False,
+            device = -1,
+            inputs = TableProjection(slot_names[i] + network_name,
+                                     parameter_name = "embedding.w0",
+                                     decay_rate_l1=l1,
+                                     sparse_remote_update = True,
+                                     sparse_update = sparse_update,
+                                     ),
+        )
+        Layer(
+            name = slot_names[i] + "_rnn1_" + network_name,
+            type = "recurrent",
+            active_type = "tanh",
+            bias = Bias(initial_std = 0,
+                        parameter_name = "rnn1.bias"),
+            inputs = Input(slot_names[i] + "_embedding_" + network_name,
+                           parameter_name = "rnn1.w0")
+        )
+        Layer(
+            name = slot_names[i] + "_rnnlast_" + network_name,
+            type = "seqlastins",
+            inputs = [
+                slot_names[i] + "_rnn1_" + network_name,
+            ],
+        )
+    Layer(
+        name = "layer2_" + network_name,
+        type = "fc",
+        active_type = "tanh",
+        size = layer2_dim,
+        bias = Bias(parameter_name = "layer2.bias"),
+        inputs = [Input(slot_name + "_rnnlast_" + network_name, 
+                        parameter_name = "_layer2_" + slot_name + ".w", 
+                        decay_rate = l2, 
+                        initial_smart = True) for slot_name in slot_names]
+    )
+    Layer(
+        name = "layer3_" + network_name,
+        type = "fc",
+        active_type = "tanh",
+        size = layer3_dim,
+        bias = Bias(parameter_name = "layer3.bias"),
+        inputs = [
+            Input("layer2_" + network_name, 
+                  parameter_name = "_layer3.w", 
+                  decay_rate = l2, 
+                  initial_smart = True),
+        ]
+    )
+    Layer(
+        name = "output_" + network_name,
+        type = "fc",
+        size = 1,
+        bias = False,
+        inputs = [
+                  Input("layer3_" + network_name,
+                       parameter_name = "_layerO.w"),
+                 ],
+        )
+ltr_network("left")
+ltr_network("right")
+Inputs("label")
+Layer(
+    name = "label",
+    type = "data",
+    size = 1,
+    )
+Outputs("cost", "qb_rnnlast_left")
+Layer(
+    name = "cost",
+    type = "rank-cost",
+    inputs = ["output_left", "output_right", "label"],
+    )
--- a/paddle/trainer/tests/test_CompareSparse.cpp
+++ b/paddle/trainer/tests/test_CompareSparse.cpp
@@ -23,7 +23,7 @@ using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
 static const string& configFile1 =
-    "trainer/tests/sample_trainer_config_qb_rnn.conf";
+    "trainer/tests/sample_trainer_config_compare_sparse.conf";
 DECLARE_bool(use_gpu);
 DECLARE_string(config);

--- a/paddle/trainer/tests/train_sparse.list
+++ b/paddle/trainer/tests/train_sparse.list
+trainer/tests/compare_sparse_data