提交 dc21a58b 编写于 作者: Y Yi Wang 提交者: GitHub

Merge pull request #3196 from helinwang/test

Make test_CompareSparse eat less memory and faster.
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
# Note: when making change to this file, please make sure
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
# for comparing these two nets can pass (test_CompareTwoNets)
default_initial_std(0.1)
default_device(0)
word_dim = 999
l1 = 0
l2 = 0
model_type("nn")
sparse_update = get_config_arg("sparse_update", bool, False)
TrainData(ProtoData(
type = "proto_sequence",
files = ('trainer/tests/train_sparse.list'),
))
Settings(
algorithm='sgd',
batch_size=100,
learning_rate=0.0001,
learning_rate_decay_a=4e-08,
learning_rate_decay_b=0.0,
learning_rate_schedule='poly',
)
wordvec_dim = 32
layer2_dim = 16
layer3_dim = 16
hidden_dim = 32
slot_names = ["qb", "qw", "tb", "tw"]
def ltr_network(network_name,
word_dim=word_dim,
wordvec_dim=wordvec_dim,
layer2_dim=layer2_dim,
layer3_dim=layer3_dim,
hidden_dim=hidden_dim,
slot_names=slot_names,
l1=l1,
l2=l2):
slotnum = len(slot_names)
for i in xrange(slotnum):
Inputs(slot_names[i] + network_name)
for i in xrange(slotnum):
Layer(
name = slot_names[i] + network_name,
type = "data",
size = word_dim,
device = -1,
)
Layer(
name = slot_names[i] + "_embedding_" + network_name,
type = "mixed",
size = wordvec_dim,
bias = False,
device = -1,
inputs = TableProjection(slot_names[i] + network_name,
parameter_name = "embedding.w0",
decay_rate_l1=l1,
sparse_remote_update = True,
sparse_update = sparse_update,
),
)
Layer(
name = slot_names[i] + "_rnn1_" + network_name,
type = "recurrent",
active_type = "tanh",
bias = Bias(initial_std = 0,
parameter_name = "rnn1.bias"),
inputs = Input(slot_names[i] + "_embedding_" + network_name,
parameter_name = "rnn1.w0")
)
Layer(
name = slot_names[i] + "_rnnlast_" + network_name,
type = "seqlastins",
inputs = [
slot_names[i] + "_rnn1_" + network_name,
],
)
Layer(
name = "layer2_" + network_name,
type = "fc",
active_type = "tanh",
size = layer2_dim,
bias = Bias(parameter_name = "layer2.bias"),
inputs = [Input(slot_name + "_rnnlast_" + network_name,
parameter_name = "_layer2_" + slot_name + ".w",
decay_rate = l2,
initial_smart = True) for slot_name in slot_names]
)
Layer(
name = "layer3_" + network_name,
type = "fc",
active_type = "tanh",
size = layer3_dim,
bias = Bias(parameter_name = "layer3.bias"),
inputs = [
Input("layer2_" + network_name,
parameter_name = "_layer3.w",
decay_rate = l2,
initial_smart = True),
]
)
Layer(
name = "output_" + network_name,
type = "fc",
size = 1,
bias = False,
inputs = [
Input("layer3_" + network_name,
parameter_name = "_layerO.w"),
],
)
ltr_network("left")
ltr_network("right")
Inputs("label")
Layer(
name = "label",
type = "data",
size = 1,
)
Outputs("cost", "qb_rnnlast_left")
Layer(
name = "cost",
type = "rank-cost",
inputs = ["output_left", "output_right", "label"],
)
......@@ -23,7 +23,7 @@ using namespace paddle; // NOLINT
using namespace std; // NOLINT
static const string& configFile1 =
"trainer/tests/sample_trainer_config_qb_rnn.conf";
"trainer/tests/sample_trainer_config_compare_sparse.conf";
DECLARE_bool(use_gpu);
DECLARE_string(config);
......
trainer/tests/compare_sparse_data
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册