diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 45edef017e0c9599bf374fb1d997ea5a46e9d486..09e1b949c25f2a0d7909defae5f751541a1335ca 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -111,3 +111,12 @@ if(NOT ON_TRAVIS) ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() + +################ test_CompareTwoNets ###################### +add_unittest_without_exec(test_CompareTwoNets + test_CompareTwoNets.cpp) +add_test(NAME test_CompareTwoNets + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests + ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) diff --git a/paddle/gserver/tests/sequence_recurrent.py b/paddle/gserver/tests/sequence_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..4895df186bfecc5cb5263676a9cd5bac5039d565 --- /dev/null +++ b/paddle/gserver/tests/sequence_recurrent.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_file = dict() +for line_count, line in enumerate(open(dict_path, "r")): + dict_file[line.strip()] = line_count + +define_py_data_sources2( + train_list='gserver/tests/Sequence/train.list', + test_list=None, + module='sequenceGen', + obj='process', + args={"dict_file": dict_file}) + +settings(batch_size=5) +######################## network configure ################################ +dict_dim = len(open(dict_path, 'r').readlines()) +word_dim = 128 +hidden_dim = 128 +label_dim = 3 + +# This config is designed to be equivalent with sequence_recurrent_group.py + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer( + input=data, size=word_dim, param_attr=ParamAttr(name="emb")) + +recurrent = recurrent_layer(input=emb, bias_attr=False, act=SoftmaxActivation()) + +recurrent_last = last_seq(input=recurrent) + +with mixed_layer( + size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: + output += full_matrix_projection(input=recurrent_last) + +outputs( + classification_cost( + input=output, label=data_layer( + name="label", size=1))) diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/gserver/tests/sequence_recurrent_group.py new file mode 100644 index 0000000000000000000000000000000000000000..a1d54542e3bc4e89f70d31d5e89c0f44953c9f90 --- /dev/null +++ b/paddle/gserver/tests/sequence_recurrent_group.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_file = dict() +for line_count, line in enumerate(open(dict_path, "r")): + dict_file[line.strip()] = line_count + +define_py_data_sources2( + train_list='gserver/tests/Sequence/train.list', + test_list=None, + module='sequenceGen', + obj='process', + args={"dict_file": dict_file}) + +settings(batch_size=5) +######################## network configure ################################ +dict_dim = len(open(dict_path, 'r').readlines()) +word_dim = 128 +hidden_dim = 128 +label_dim = 3 + +# This config is designed to be equivalent with sequence_recurrent.py + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer( + input=data, size=word_dim, param_attr=ParamAttr(name="emb")) + + +def step(y): + mem = memory(name="rnn_state", size=hidden_dim) + with mixed_layer( + name="rnn_state", + size=hidden_dim, + bias_attr=False, + act=SoftmaxActivation()) as out: + out += identity_projection(input=y) + out += full_matrix_projection( + input=mem, param_attr=ParamAttr(name="___recurrent_layer_0__")) + return out + + +recurrent = recurrent_group(name="rnn", step=step, input=emb) + +recurrent_last = last_seq(input=recurrent) + +with mixed_layer( + size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: + output += full_matrix_projection(input=recurrent_last) + +outputs( + classification_cost( + input=output, label=data_layer( + name="label", size=1))) diff --git a/paddle/trainer/tests/test_CompareTwoNets.cpp b/paddle/gserver/tests/test_CompareTwoNets.cpp similarity index 95% rename from paddle/trainer/tests/test_CompareTwoNets.cpp rename to paddle/gserver/tests/test_CompareTwoNets.cpp index 94f65e545d116c802fb4877dc14f07aaaf83a4fb..801d9607565910b1f7f68a9c4532de5877e44f30 100644 --- a/paddle/trainer/tests/test_CompareTwoNets.cpp +++ b/paddle/gserver/tests/test_CompareTwoNets.cpp @@ -30,8 +30,6 @@ DECLARE_bool(use_gpu); DECLARE_string(config); DECLARE_string(nics); -DEFINE_string(config_file_a, "", "config of one network to compare"); -DEFINE_string(config_file_b, "", "config of another network to compare"); DEFINE_bool(need_high_accuracy, false, "whether need to run in double accuracy"); @@ -42,6 +40,10 @@ DEFINE_double( DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_int32(seed); +static const string& config_file_a = "gserver/tests/sequence_recurrent.py"; +static const string& config_file_b = + "gserver/tests/sequence_recurrent_group.py"; + struct ComData { vector outArgs; vector parameters; @@ -66,6 +68,7 @@ void calcGradient(ComData& data, const string configFile) { DataBatch dataBatch; int32_t batchSize = trainer.getConfig().opt_config().batch_size(); + trainer.getDataProvider()->reset(); trainer.getDataProvider()->setSkipShuffle(); trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch); @@ -167,11 +170,11 @@ void compareGradient(ComData& comDataA, ComData& comDataB) { TEST(Trainer, create) { ComData dataA; - calcGradient(dataA, FLAGS_config_file_a); + calcGradient(dataA, config_file_a); LOG(INFO) << "\n\nforwardBackward of Network A is finished\n\n"; ComData dataB; - calcGradient(dataB, FLAGS_config_file_b); + calcGradient(dataB, config_file_b); LOG(INFO) << "\n\nforwardBackward of the Network B is finished\n\n"; compareGradient(dataA, dataB); diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 441df2b57b7729446aeb1fd9ccbd4dad5075f277..3168f3c0ff016bb515d8bb50370d91bf25cbc5da 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -28,14 +28,6 @@ if(WITH_PYTHON) ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() -################ test_CompareTwoNets ###################### -add_unittest_without_exec(test_CompareTwoNets - test_CompareTwoNets.cpp) -add_test(NAME test_CompareTwoNets - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ - ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets - --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_CompareTwoOpts ################### add_unittest_without_exec(test_CompareTwoOpts diff --git a/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf b/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf deleted file mode 100644 index d19222360c2f424ddb306b155dfef07921098a6b..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf +++ /dev/null @@ -1,154 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. - -# Note: when making change to this file, please make sure -# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest -# for comparing these two nets can pass (test_CompareTwoNets) - -default_initial_std(0.1) -default_device(0) - -word_dim = 1451594 -l1 = 0 -l2 = 0 - -model_type("nn") - -sparse_update = get_config_arg("sparse_update", bool, False) - -TrainData(ProtoData( - type = "proto_sequence", - files = ('trainer/tests/train.list'), - )) - -Settings( - algorithm='sgd', - batch_size=100, - learning_rate=0.0001, - learning_rate_decay_a=4e-08, - learning_rate_decay_b=0.0, - learning_rate_schedule='poly', -) - - -wordvec_dim = 128 -layer2_dim = 96 -layer3_dim = 96 -hidden_dim = 128 - -slot_names = ["qb", "qw", "tb", "tw"] - -def ltr_network(network_name, - word_dim=word_dim, - wordvec_dim=wordvec_dim, - layer2_dim=layer2_dim, - layer3_dim=layer3_dim, - hidden_dim=hidden_dim, - slot_names=slot_names, - l1=l1, - l2=l2): - - slotnum = len(slot_names) - for i in xrange(slotnum): - Inputs(slot_names[i] + network_name) - for i in xrange(slotnum): - Layer( - name = slot_names[i] + network_name, - type = "data", - size = word_dim, - device = -1, - ) - Layer( - name = slot_names[i] + "_embedding_" + network_name, - type = "mixed", - size = wordvec_dim, - bias = False, - device = -1, - inputs = TableProjection(slot_names[i] + network_name, - parameter_name = "embedding.w0", - decay_rate_l1=l1, - sparse_remote_update = True, - sparse_update = sparse_update, - ), - ) - Layer( - name = slot_names[i] + "_rnn1_" + network_name, - type = "recurrent", - active_type = "tanh", - bias = Bias(initial_std = 0, - parameter_name = "rnn1.bias"), - inputs = Input(slot_names[i] + "_embedding_" + network_name, - parameter_name = "rnn1.w0") - ) - Layer( - name = slot_names[i] + "_rnnlast_" + network_name, - type = "seqlastins", - inputs = [ - slot_names[i] + "_rnn1_" + network_name, - ], - ) - - Layer( - name = "layer2_" + network_name, - type = "fc", - active_type = "tanh", - size = layer2_dim, - bias = Bias(parameter_name = "layer2.bias"), - inputs = [Input(slot_name + "_rnnlast_" + network_name, - parameter_name = "_layer2_" + slot_name + ".w", - decay_rate = l2, - initial_smart = True) for slot_name in slot_names] - ) - Layer( - name = "layer3_" + network_name, - type = "fc", - active_type = "tanh", - size = layer3_dim, - bias = Bias(parameter_name = "layer3.bias"), - inputs = [ - Input("layer2_" + network_name, - parameter_name = "_layer3.w", - decay_rate = l2, - initial_smart = True), - ] - ) - Layer( - name = "output_" + network_name, - type = "fc", - size = 1, - bias = False, - inputs = [ - Input("layer3_" + network_name, - parameter_name = "_layerO.w"), - ], - ) - - -ltr_network("left") -ltr_network("right") -Inputs("label") -Layer( - name = "label", - type = "data", - size = 1, - ) -Outputs("cost", "qb_rnnlast_left") -Layer( - name = "cost", - type = "rank-cost", - inputs = ["output_left", "output_right", "label"], - ) diff --git a/paddle/trainer/tests/sample_trainer_config_rnn.conf b/paddle/trainer/tests/sample_trainer_config_rnn.conf deleted file mode 100644 index b720d4d5a6ca59e207832a8c5410c2cb6074c439..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/sample_trainer_config_rnn.conf +++ /dev/null @@ -1,180 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. - -# Note: when making change to this file, please make sure -# sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest -# for comparing these two nets can pass (test_CompareTwoNets) - -default_initial_std(0.1) -default_device(0) - -word_dim = 1451594 -l1 = 0 -l2 = 0 - -model_type("recurrent_nn") - -sparse_update = get_config_arg("sparse_update", bool, False) - -TrainData(ProtoData( - type = "proto_sequence", - files = ('trainer/tests/train.list'), - )) - -Settings( - algorithm='sgd', - batch_size=100, - learning_rate=0.0001, - learning_rate_decay_a=4e-08, - learning_rate_decay_b=0.0, - learning_rate_schedule='poly', -) - - -wordvec_dim = 128 -layer2_dim = 96 -layer3_dim = 96 -hidden_dim = 128 - -slot_names = ["qb", "qw", "tb", "tw"] - -def SimpleRecurrentLayer(name, - size, - active_type, - bias, - input_layer_name, - parameter_name, - seq_reversed = False): - RecurrentLayerGroupBegin(name + "_layer_group", - in_links=[input_layer_name], - out_links=[name], - seq_reversed=seq_reversed) - memory_name = Memory(name=name, size=size) - Layer( - name = name, - type = "mixed", - size = size, - active_type = active_type, - bias = bias, - inputs = [IdentityProjection(input_layer_name), - FullMatrixProjection(memory_name, - parameter_name = parameter_name, - ), - ] - ) - RecurrentLayerGroupEnd(name + "_layer_group") - - -def ltr_network(network_name, - word_dim=word_dim, - wordvec_dim=wordvec_dim, - layer2_dim=layer2_dim, - layer3_dim=layer3_dim, - hidden_dim=hidden_dim, - slot_names=slot_names, - l1=l1, - l2=l2): - - slotnum = len(slot_names) - for i in xrange(slotnum): - Inputs(slot_names[i] + network_name) - for i in xrange(slotnum): - Layer( - name = slot_names[i] + network_name, - type = "data", - size = word_dim, - device = -1, - ) - Layer( - name = slot_names[i] + "_embedding_" + network_name, - type = "mixed", - size = wordvec_dim, - bias = False, - device = -1, - inputs = TableProjection(slot_names[i] + network_name, - parameter_name = "embedding.w0", - decay_rate_l1=l1, - sparse_remote_update = True, - sparse_update = sparse_update, - ), - ) - SimpleRecurrentLayer( - name = slot_names[i] + "_rnn1_" + network_name, - size = hidden_dim, - active_type = "tanh", - bias = Bias(initial_std = 0, - parameter_name = "rnn1.bias"), - input_layer_name = slot_names[i] + "_embedding_" + network_name, - parameter_name = "rnn1.w0", - ) - Layer( - name = slot_names[i] + "_rnnlast_" + network_name, - type = "seqlastins", - inputs = [ - slot_names[i] + "_rnn1_" + network_name, - ], - ) - Layer( - name = "layer2_" + network_name, - type = "fc", - active_type = "tanh", - size = layer2_dim, - bias = Bias(parameter_name = "layer2.bias"), - inputs = [Input(slot_name + "_rnnlast_" + network_name, - parameter_name = "_layer2_" + slot_name + ".w", - decay_rate = l2, - initial_smart = True) for slot_name in slot_names] - ) - Layer( - name = "layer3_" + network_name, - type = "fc", - active_type = "tanh", - size = layer3_dim, - bias = Bias(parameter_name = "layer3.bias"), - inputs = [ - Input("layer2_" + network_name, - parameter_name = "_layer3.w", - decay_rate = l2, - initial_smart = True), - ] - ) - Layer( - name = "output_" + network_name, - type = "fc", - size = 1, - bias = False, - inputs = [ - Input("layer3_" + network_name, - parameter_name = "_layerO.w"), - ], - ) - - -ltr_network("left") -ltr_network("right") -Inputs("label") -Layer( - name = "label", - type = "data", - size = 1, - ) -Outputs("cost", "qb_rnnlast_left") -Layer( - name = "cost", - type = "rank-cost", - inputs = ["output_left", "output_right", "label"], - )