未验证 提交 ba868854 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #5345 from luotao1/ProtoDataProvider

remove usused ProtoDataProvider related codes
......@@ -73,7 +73,6 @@ if(MOBILE_INFERENCE)
list(REMOVE_ITEM GSERVER_SOURCES
dataproviders/DataProvider.cpp
dataproviders/MultiDataProvider.cpp
dataproviders/ProtoDataProvider.cpp
dataproviders/PyDataProvider2.cpp
dataproviders/PyDataProvider.cpp)
......
......@@ -16,8 +16,8 @@ limitations under the License. */
#include <unistd.h>
#include <algorithm>
#include "ProtoDataProvider.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
......@@ -164,8 +164,6 @@ DataProvider* DataProvider::create(const DataConfig& config,
REGISTER_DATA_PROVIDER(simple, SimpleDataProvider);
REGISTER_DATA_PROVIDER(dummy, DummyDataProvider);
REGISTER_DATA_PROVIDER(proto, ProtoDataProvider);
REGISTER_DATA_PROVIDER(proto_sequence, ProtoSequenceDataProvider);
int64_t DataProvider::getNextBatch(int64_t size, DataBatch* batch) {
int64_t batchSize = doubleBuffer_ ? getNextBatchFromBuffer(size, batch)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "DataFormat.pb.h"
#include "paddle/utils/Stat.h"
#include "DataProvider.h"
#include "ProtoReader.h"
namespace paddle {
/**
* @brief Provider data from protobuf data file with each sample
* specified by proto message
*
* DataSample defined in DataFormat.proto.
*
* The file format is
*
* header
*
* sample1
*
* sample2
*
* ...
*
* sampleN
*
* @note: In the data file, each message is prefixed with its length.
* The read/write of the protbuf are implemented in ProtoReader.h
*/
class ProtoDataProvider : public DataProvider {
public:
ProtoDataProvider(const DataConfig& config,
bool useGpu,
bool loadDataAll = true);
virtual void reset();
/**
* @note this size includes the sequences which are skipped because they
* are longer than the batch size.
*/
virtual int64_t getSize() {
int64_t size = sampleNums_;
if (usageRatio_ < 1.0f) {
size = static_cast<int64_t>(size * usageRatio_);
}
return size;
}
virtual void shuffle();
void loadData(const std::vector<std::string>& fileList);
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
protected:
/**
* @brief load protobuf data from a list of file
* @param[in] fileName file name of a file which contains
* a list of file names
*/
void loadData(const std::string& fileName);
/**
* @brief load protobuf data from file
* @param[in] fileName data file name
*/
void loadDataFile(const std::string& fileName);
/** @brief check data header of each data sample
* @param[in] header data header read from protobuf data
*/
void checkDataHeader(const DataHeader& header);
/**
* @brief fill protobuf data into slot_,
* slot_ is a vector of ProtoSlot in memory.
* @param[in] sample data sample read from protobuf data
*/
void fillSlots(const DataSample& sample);
/**
* @brief return true if each sample is one sequence, i.e., independent
* of other samples.
*/
inline bool iidData() const { return sequenceStartPositions_.empty(); }
/**
* @brief check that sample is consistent with header_
*/
void checkSample(const DataSample& sample);
template <class Op>
int64_t sequenceLoop(Op op, int64_t size);
template <class Op>
int64_t sampleLoop(Op op, int64_t size);
template <class Op>
int64_t subSampleLoop(Op op, int64_t size, int slot);
void showDataStats();
protected:
struct ProtoVarSlot {
std::vector<real> data;
std::vector<int> dims;
};
struct ProtoSlot {
SlotDef::SlotType type;
int dim;
std::vector<int> indexData;
std::vector<real> denseData;
std::vector<sparse_non_value_t> sparseNonValueData;
std::vector<sparse_float_value_t> sparseFloatValueData;
std::vector<int64_t> indices;
std::vector<int64_t> subIndices;
std::vector<ProtoVarSlot> varDenseData;
std::vector<std::vector<int>> varIndices;
std::vector<std::string> strData;
};
DataHeader header_;
int numVecSlots_;
std::vector<ProtoSlot> slots_;
size_t sampleNums_;
/**
* The starting position of each sequence in samples.
* The last element should be num of samples.
* If empty, each sample is one sequence.
*/
std::vector<size_t> sequenceStartPositions_;
int64_t currentSequenceIndex_;
// The size should be the number of sequences.
std::vector<size_t> shuffledSequenceIds_;
ThreadLocalD<DataBatch> cpuBatch_;
ThreadLocalD<DataBatch> gpuBatch_;
RWLock lock_;
std::vector<StatPtr> nnzStats_; // stats for number of none-zeros entries
};
/**
* @brief Special use for Proto data: instances should contain sparse-non-value
* slots
* and label.
*
* @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
*/
class ProtoSequenceDataProvider : public ProtoDataProvider {
public:
ProtoSequenceDataProvider(const DataConfig& config,
bool useGpu,
bool loadDataAll = true);
~ProtoSequenceDataProvider() {}
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
};
} // namespace paddle
......@@ -62,17 +62,6 @@ if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
endif()
if(NOT MOBILE_INFERENCE)
################### test_ProtoDataProvider ############
add_unittest_without_exec(test_ProtoDataProvider
test_ProtoDataProvider.cpp)
# test_ProtoDataProvider will mkdir as same name,
# so if WORKING_DIRECTORY is default directory, then
# mkdir will get error.
add_test(NAME test_ProtoDataProvider
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
################## test_Evaluator #######################
add_unittest(test_Evaluator
test_Evaluator.cpp)
......@@ -110,3 +99,24 @@ add_test(NAME test_PyDataProvider2
COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/paddle/gserver/tests:${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle
)
################# test_CompareSparse ##################
add_unittest_without_exec(test_CompareSparse
test_CompareSparse.cpp)
if(NOT ON_TRAVIS)
add_test(NAME test_CompareSparse
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d
${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
./.set_port.sh -p port -n 6
${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
endif()
################ test_CompareTwoNets ######################
add_unittest_without_exec(test_CompareTwoNets
test_CompareTwoNets.cpp)
add_test(NAME test_CompareTwoNets
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d
${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
./test_ProtoDataProvider/data1.bin
./test_ProtoDataProvider/data2.bin
./test_ProtoDataProvider/data1.bin.gz
./test_ProtoDataProvider/data2.bin.gz
#!/usr/bin/env python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -14,27 +15,50 @@
from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
################################### Algorithm Configuration ###################################
settings(batch_size = 1000,
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
################################### Network Configuration ###################################
data = data_layer(name ="input", size=784)
######################## data source ################################
dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
dict_file = dict()
for line_count, line in enumerate(open(dict_path, "r")):
dict_file[line.strip()] = line_count
fc1 = fc_layer(input=data, size=800,
bias_attr=True,
act=SigmoidActivation())
define_py_data_sources2(
train_list='gserver/tests/Sequence/train.list',
test_list=None,
module='sequenceGen',
obj='process',
args={"dict_file": dict_file})
fc2 = fc_layer(input=fc1, size=800,
bias_attr=True,
act=SigmoidActivation())
settings(batch_size=5)
######################## network configure ################################
dict_dim = len(open(dict_path, 'r').readlines())
word_dim = 128
hidden_dim = 256
label_dim = 3
sparse_update = get_config_arg("sparse_update", bool, False)
output = fc_layer(input=[fc1, fc2], size=10,
bias_attr=True,
act=SoftmaxActivation())
data = data_layer(name="word", size=dict_dim)
lbl = data_layer(name ="label", size=1)
emb = embedding_layer(
input=data,
size=word_dim,
param_attr=ParamAttr(sparse_update=sparse_update))
cost = classification_cost(input=output, label=lbl)
outputs(cost)
with mixed_layer(size=hidden_dim * 4) as lstm_input:
lstm_input += full_matrix_projection(input=emb)
lstm = lstmemory(
input=lstm_input,
act=TanhActivation(),
gate_act=SigmoidActivation(),
state_act=TanhActivation())
lstm_last = last_seq(input=lstm)
with mixed_layer(
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
output += full_matrix_projection(input=lstm_last)
outputs(
classification_cost(
input=output, label=data_layer(
name="label", size=1)))
#!/usr/bin/env python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -14,27 +15,42 @@
from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
################################### Algorithm Configuration ###################################
settings(batch_size = 1000,
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
################################### Network Configuration ###################################
data = data_layer(name ="input", size=784)
######################## data source ################################
dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
dict_file = dict()
for line_count, line in enumerate(open(dict_path, "r")):
dict_file[line.strip()] = line_count
fc1 = fc_layer(input=data, size=800,
bias_attr=True,
act=SigmoidActivation())
define_py_data_sources2(
train_list='gserver/tests/Sequence/train.list',
test_list=None,
module='sequenceGen',
obj='process',
args={"dict_file": dict_file})
fc2 = fc_layer(input=fc1, size=800,
bias_attr=True,
act=SigmoidActivation())
settings(batch_size=5)
######################## network configure ################################
dict_dim = len(open(dict_path, 'r').readlines())
word_dim = 128
hidden_dim = 128
label_dim = 3
output = fc_layer(input=[fc1, fc2], size=10,
bias_attr=True,
act=SoftmaxActivation())
# This config is designed to be equivalent with sequence_recurrent_group.py
lbl = data_layer(name ="label", size=1)
data = data_layer(name="word", size=dict_dim)
cost = classification_cost(input=output, label=lbl)
outputs(cost)
emb = embedding_layer(
input=data, size=word_dim, param_attr=ParamAttr(name="emb"))
recurrent = recurrent_layer(input=emb, bias_attr=False, act=SoftmaxActivation())
recurrent_last = last_seq(input=recurrent)
with mixed_layer(
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
output += full_matrix_projection(input=recurrent_last)
outputs(
classification_cost(
input=output, label=data_layer(
name="label", size=1)))
#!/usr/bin/env python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
dict_file = dict()
for line_count, line in enumerate(open(dict_path, "r")):
dict_file[line.strip()] = line_count
define_py_data_sources2(
train_list='gserver/tests/Sequence/train.list',
test_list=None,
module='sequenceGen',
obj='process',
args={"dict_file": dict_file})
settings(batch_size=5)
######################## network configure ################################
dict_dim = len(open(dict_path, 'r').readlines())
word_dim = 128
hidden_dim = 128
label_dim = 3
# This config is designed to be equivalent with sequence_recurrent.py
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(
input=data, size=word_dim, param_attr=ParamAttr(name="emb"))
def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
with mixed_layer(
name="rnn_state",
size=hidden_dim,
bias_attr=False,
act=SoftmaxActivation()) as out:
out += identity_projection(input=y)
out += full_matrix_projection(
input=mem, param_attr=ParamAttr(name="___recurrent_layer_0__"))
return out
recurrent = recurrent_group(name="rnn", step=step, input=emb)
recurrent_last = last_seq(input=recurrent)
with mixed_layer(
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
output += full_matrix_projection(input=recurrent_last)
outputs(
classification_cost(
input=output, label=data_layer(
name="label", size=1)))
......@@ -22,8 +22,7 @@ limitations under the License. */
using namespace paddle; // NOLINT
using namespace std; // NOLINT
static const string& configFile1 =
"trainer/tests/sample_trainer_config_compare_sparse.conf";
static const string& configFile1 = "gserver/tests/sequence_lstm.conf";
DECLARE_bool(use_gpu);
DECLARE_string(config);
......
......@@ -30,8 +30,6 @@ DECLARE_bool(use_gpu);
DECLARE_string(config);
DECLARE_string(nics);
DEFINE_string(config_file_a, "", "config of one network to compare");
DEFINE_string(config_file_b, "", "config of another network to compare");
DEFINE_bool(need_high_accuracy,
false,
"whether need to run in double accuracy");
......@@ -42,6 +40,10 @@ DEFINE_double(
DECLARE_bool(thread_local_rand_use_global_seed);
DECLARE_int32(seed);
static const string& config_file_a = "gserver/tests/sequence_recurrent.py";
static const string& config_file_b =
"gserver/tests/sequence_recurrent_group.py";
struct ComData {
vector<Argument> outArgs;
vector<ParameterPtr> parameters;
......@@ -66,6 +68,7 @@ void calcGradient(ComData& data, const string configFile) {
DataBatch dataBatch;
int32_t batchSize = trainer.getConfig().opt_config().batch_size();
trainer.getDataProvider()->reset();
trainer.getDataProvider()->setSkipShuffle();
trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch);
......@@ -167,11 +170,11 @@ void compareGradient(ComData& comDataA, ComData& comDataB) {
TEST(Trainer, create) {
ComData dataA;
calcGradient(dataA, FLAGS_config_file_a);
calcGradient(dataA, config_file_a);
LOG(INFO) << "\n\nforwardBackward of Network A is finished\n\n";
ComData dataB;
calcGradient(dataB, FLAGS_config_file_b);
calcGradient(dataB, config_file_b);
LOG(INFO) << "\n\nforwardBackward of the Network B is finished\n\n";
compareGradient(dataA, dataB);
......
......@@ -28,35 +28,7 @@ if(WITH_PYTHON)
${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
endif()
################ test_CompareTwoNets ######################
add_unittest_without_exec(test_CompareTwoNets
test_CompareTwoNets.cpp)
add_test(NAME test_CompareTwoNets
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets
--config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
############### test_CompareTwoOpts ###################
add_unittest_without_exec(test_CompareTwoOpts
test_CompareTwoOpts.cpp)
add_test(NAME test_CompareTwoOpts
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts
--config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf
--num_passes=1 --need_high_accuracy=0
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
################# test_CompareSparse ##################
add_unittest_without_exec(test_CompareSparse
test_CompareSparse.cpp)
if(NOT ON_TRAVIS)
add_test(NAME test_CompareSparse
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
./.set_port.sh -p port -n 6
${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
endif()
################# test_recurrent_machine_generation ###############
add_unittest_without_exec(test_recurrent_machine_generation
test_recurrent_machine_generation.cpp)
......
./trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
# Note: when making change to this file, please make sure
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
# for comparing these two nets can pass (test_CompareTwoNets)
default_initial_std(0.1)
default_device(0)
word_dim = 999
l1 = 0
l2 = 0
model_type("nn")
sparse_update = get_config_arg("sparse_update", bool, False)
TrainData(ProtoData(
type = "proto_sequence",
files = ('trainer/tests/train_sparse.list'),
))
Settings(
algorithm='sgd',
batch_size=100,
learning_rate=0.0001,
learning_rate_decay_a=4e-08,
learning_rate_decay_b=0.0,
learning_rate_schedule='poly',
)
wordvec_dim = 32
layer2_dim = 16
layer3_dim = 16
hidden_dim = 32
slot_names = ["qb", "qw", "tb", "tw"]
def ltr_network(network_name,
word_dim=word_dim,
wordvec_dim=wordvec_dim,
layer2_dim=layer2_dim,
layer3_dim=layer3_dim,
hidden_dim=hidden_dim,
slot_names=slot_names,
l1=l1,
l2=l2):
slotnum = len(slot_names)
for i in xrange(slotnum):
Inputs(slot_names[i] + network_name)
for i in xrange(slotnum):
Layer(
name = slot_names[i] + network_name,
type = "data",
size = word_dim,
device = -1,
)
Layer(
name = slot_names[i] + "_embedding_" + network_name,
type = "mixed",
size = wordvec_dim,
bias = False,
device = -1,
inputs = TableProjection(slot_names[i] + network_name,
parameter_name = "embedding.w0",
decay_rate_l1=l1,
sparse_remote_update = True,
sparse_update = sparse_update,
),
)
Layer(
name = slot_names[i] + "_rnn1_" + network_name,
type = "recurrent",
active_type = "tanh",
bias = Bias(initial_std = 0,
parameter_name = "rnn1.bias"),
inputs = Input(slot_names[i] + "_embedding_" + network_name,
parameter_name = "rnn1.w0")
)
Layer(
name = slot_names[i] + "_rnnlast_" + network_name,
type = "seqlastins",
inputs = [
slot_names[i] + "_rnn1_" + network_name,
],
)
Layer(
name = "layer2_" + network_name,
type = "fc",
active_type = "tanh",
size = layer2_dim,
bias = Bias(parameter_name = "layer2.bias"),
inputs = [Input(slot_name + "_rnnlast_" + network_name,
parameter_name = "_layer2_" + slot_name + ".w",
decay_rate = l2,
initial_smart = True) for slot_name in slot_names]
)
Layer(
name = "layer3_" + network_name,
type = "fc",
active_type = "tanh",
size = layer3_dim,
bias = Bias(parameter_name = "layer3.bias"),
inputs = [
Input("layer2_" + network_name,
parameter_name = "_layer3.w",
decay_rate = l2,
initial_smart = True),
]
)
Layer(
name = "output_" + network_name,
type = "fc",
size = 1,
bias = False,
inputs = [
Input("layer3_" + network_name,
parameter_name = "_layerO.w"),
],
)
ltr_network("left")
ltr_network("right")
Inputs("label")
Layer(
name = "label",
type = "data",
size = 1,
)
Outputs("cost", "qb_rnnlast_left")
Layer(
name = "cost",
type = "rank-cost",
inputs = ["output_left", "output_right", "label"],
)
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
# Note: when making change to this file, please make sure
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
# for comparing these two nets can pass (test_CompareTwoNets)
default_initial_std(0.1)
default_device(0)
word_dim = 1451594
l1 = 0
l2 = 0
model_type("nn")
sparse_update = get_config_arg("sparse_update", bool, False)
TrainData(ProtoData(
type = "proto_sequence",
files = ('trainer/tests/train.list'),
))
Settings(
algorithm='sgd',
batch_size=100,
learning_rate=0.0001,
learning_rate_decay_a=4e-08,
learning_rate_decay_b=0.0,
learning_rate_schedule='poly',
)
wordvec_dim = 128
layer2_dim = 96
layer3_dim = 96
hidden_dim = 128
slot_names = ["qb", "qw", "tb", "tw"]
def ltr_network(network_name,
word_dim=word_dim,
wordvec_dim=wordvec_dim,
layer2_dim=layer2_dim,
layer3_dim=layer3_dim,
hidden_dim=hidden_dim,
slot_names=slot_names,
l1=l1,
l2=l2):
slotnum = len(slot_names)
for i in xrange(slotnum):
Inputs(slot_names[i] + network_name)
for i in xrange(slotnum):
Layer(
name = slot_names[i] + network_name,
type = "data",
size = word_dim,
device = -1,
)
Layer(
name = slot_names[i] + "_embedding_" + network_name,
type = "mixed",
size = wordvec_dim,
bias = False,
device = -1,
inputs = TableProjection(slot_names[i] + network_name,
parameter_name = "embedding.w0",
decay_rate_l1=l1,
sparse_remote_update = True,
sparse_update = sparse_update,
),
)
Layer(
name = slot_names[i] + "_rnn1_" + network_name,
type = "recurrent",
active_type = "tanh",
bias = Bias(initial_std = 0,
parameter_name = "rnn1.bias"),
inputs = Input(slot_names[i] + "_embedding_" + network_name,
parameter_name = "rnn1.w0")
)
Layer(
name = slot_names[i] + "_rnnlast_" + network_name,
type = "seqlastins",
inputs = [
slot_names[i] + "_rnn1_" + network_name,
],
)
Layer(
name = "layer2_" + network_name,
type = "fc",
active_type = "tanh",
size = layer2_dim,
bias = Bias(parameter_name = "layer2.bias"),
inputs = [Input(slot_name + "_rnnlast_" + network_name,
parameter_name = "_layer2_" + slot_name + ".w",
decay_rate = l2,
initial_smart = True) for slot_name in slot_names]
)
Layer(
name = "layer3_" + network_name,
type = "fc",
active_type = "tanh",
size = layer3_dim,
bias = Bias(parameter_name = "layer3.bias"),
inputs = [
Input("layer2_" + network_name,
parameter_name = "_layer3.w",
decay_rate = l2,
initial_smart = True),
]
)
Layer(
name = "output_" + network_name,
type = "fc",
size = 1,
bias = False,
inputs = [
Input("layer3_" + network_name,
parameter_name = "_layerO.w"),
],
)
ltr_network("left")
ltr_network("right")
Inputs("label")
Layer(
name = "label",
type = "data",
size = 1,
)
Outputs("cost", "qb_rnnlast_left")
Layer(
name = "cost",
type = "rank-cost",
inputs = ["output_left", "output_right", "label"],
)
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
# Note: when making change to this file, please make sure
# sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest
# for comparing these two nets can pass (test_CompareTwoNets)
default_initial_std(0.1)
default_device(0)
word_dim = 1451594
l1 = 0
l2 = 0
model_type("recurrent_nn")
sparse_update = get_config_arg("sparse_update", bool, False)
TrainData(ProtoData(
type = "proto_sequence",
files = ('trainer/tests/train.list'),
))
Settings(
algorithm='sgd',
batch_size=100,
learning_rate=0.0001,
learning_rate_decay_a=4e-08,
learning_rate_decay_b=0.0,
learning_rate_schedule='poly',
)
wordvec_dim = 128
layer2_dim = 96
layer3_dim = 96
hidden_dim = 128
slot_names = ["qb", "qw", "tb", "tw"]
def SimpleRecurrentLayer(name,
size,
active_type,
bias,
input_layer_name,
parameter_name,
seq_reversed = False):
RecurrentLayerGroupBegin(name + "_layer_group",
in_links=[input_layer_name],
out_links=[name],
seq_reversed=seq_reversed)
memory_name = Memory(name=name, size=size)
Layer(
name = name,
type = "mixed",
size = size,
active_type = active_type,
bias = bias,
inputs = [IdentityProjection(input_layer_name),
FullMatrixProjection(memory_name,
parameter_name = parameter_name,
),
]
)
RecurrentLayerGroupEnd(name + "_layer_group")
def ltr_network(network_name,
word_dim=word_dim,
wordvec_dim=wordvec_dim,
layer2_dim=layer2_dim,
layer3_dim=layer3_dim,
hidden_dim=hidden_dim,
slot_names=slot_names,
l1=l1,
l2=l2):
slotnum = len(slot_names)
for i in xrange(slotnum):
Inputs(slot_names[i] + network_name)
for i in xrange(slotnum):
Layer(
name = slot_names[i] + network_name,
type = "data",
size = word_dim,
device = -1,
)
Layer(
name = slot_names[i] + "_embedding_" + network_name,
type = "mixed",
size = wordvec_dim,
bias = False,
device = -1,
inputs = TableProjection(slot_names[i] + network_name,
parameter_name = "embedding.w0",
decay_rate_l1=l1,
sparse_remote_update = True,
sparse_update = sparse_update,
),
)
SimpleRecurrentLayer(
name = slot_names[i] + "_rnn1_" + network_name,
size = hidden_dim,
active_type = "tanh",
bias = Bias(initial_std = 0,
parameter_name = "rnn1.bias"),
input_layer_name = slot_names[i] + "_embedding_" + network_name,
parameter_name = "rnn1.w0",
)
Layer(
name = slot_names[i] + "_rnnlast_" + network_name,
type = "seqlastins",
inputs = [
slot_names[i] + "_rnn1_" + network_name,
],
)
Layer(
name = "layer2_" + network_name,
type = "fc",
active_type = "tanh",
size = layer2_dim,
bias = Bias(parameter_name = "layer2.bias"),
inputs = [Input(slot_name + "_rnnlast_" + network_name,
parameter_name = "_layer2_" + slot_name + ".w",
decay_rate = l2,
initial_smart = True) for slot_name in slot_names]
)
Layer(
name = "layer3_" + network_name,
type = "fc",
active_type = "tanh",
size = layer3_dim,
bias = Bias(parameter_name = "layer3.bias"),
inputs = [
Input("layer2_" + network_name,
parameter_name = "_layer3.w",
decay_rate = l2,
initial_smart = True),
]
)
Layer(
name = "output_" + network_name,
type = "fc",
size = 1,
bias = False,
inputs = [
Input("layer3_" + network_name,
parameter_name = "_layerO.w"),
],
)
ltr_network("left")
ltr_network("right")
Inputs("label")
Layer(
name = "label",
type = "data",
size = 1,
)
Outputs("cost", "qb_rnnlast_left")
Layer(
name = "cost",
type = "rank-cost",
inputs = ["output_left", "output_right", "label"],
)
......@@ -20,28 +20,6 @@ import random
import json
import string
@provider(slots=[
SparseNonValueSlot(10), DenseSlot(2), SparseValueSlot(10), StringSlot(1),
IndexSlot(3)
])
def processNonSequenceData(obj, filename):
with open(filename, "rb") as f:
for line in f:
slots_str = line.split(';')
index = int(slots_str[0])
non_values = map(int, slots_str[1].split()[1:])
dense = map(float, slots_str[2].split()[1:])
strs = slots_str[4].strip().split(' ', 1)[1]
def __values_mapper__(s):
s = s.split(":")
return int(s[0]), float(s[1])
values = map(__values_mapper__, slots_str[3].split()[1:])
yield [non_values, dense, values, strs, index]
SPARSE_ID_LIMIT = 1000
SPARSE_ID_COUNT = 100
SEQUENCE_LIMIT = 50
......@@ -146,8 +124,6 @@ def processSubSeqAndGenerateData(obj, name):
if __name__ == "__main__":
pvd = processNonSequenceData("test.txt")
print pvd.getNextBatch(100)
pvd = processSeqAndGenerateData("_")
print pvd.getNextBatch(100)
pvd = processSubSeqAndGenerateData("_")
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <paddle/utils/PythonUtil.h>
#include <algorithm>
#include <cstdlib>
#include "paddle/trainer/Trainer.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
DECLARE_int32(gpu_id);
DECLARE_bool(local);
DECLARE_bool(use_gpu);
DECLARE_string(config);
DECLARE_string(nics);
DEFINE_string(config_file_a, "", "config of one network to compare");
DEFINE_string(config_file_b, "", "config of another network to compare");
DEFINE_bool(need_high_accuracy,
true,
"whether need to run in double accuracy (recommended)");
DEFINE_double(
max_diff_ratio,
0.0f,
"max diff ratio allowed for outputs and parameters (value/gradient)");
struct ComData {
vector<Argument> outArgs;
vector<ParameterPtr> parameters;
};
void calcGradient(ComData& data, const string configFile) {
FLAGS_config = configFile;
FLAGS_local = true;
FLAGS_use_gpu = false;
FLAGS_nics = "";
*ThreadLocalRand::getSeed() = 0;
srand(0);
Trainer trainer;
trainer.init(TrainerConfigHelper::createFromFlagConfig(), false);
data.parameters = trainer.getGradientMachine()->getParameters();
trainer.getDataProvider()->setSkipShuffle();
trainer.train();
}
void checkBuffer(real* A,
const char* desA,
real* B,
const char* desB,
size_t len,
size_t width = 1) {
int nNum = 0;
for (size_t i = 0; i < len; ++i) {
real diff = fabs(A[i] - B[i]);
if (diff > 0.0f &&
diff / std::max(fabs(A[i]), fabs(B[i])) > FLAGS_max_diff_ratio) {
nNum++;
LOG(INFO) << "Row: " << i / width << ", " << desA << " : " << A[i]
<< " " << desB << " : " << B[i];
}
}
EXPECT_EQ(0, nNum);
LOG(INFO) << "\n\n";
}
void compareGradient(ComData& comDataA, ComData& comDataB) {
vector<Argument> outArgsA = comDataA.outArgs;
vector<Argument> outArgsB = comDataB.outArgs;
for (size_t i = 0; i < outArgsA.size(); ++i) {
CpuMatrix matA(outArgsA[i].value->getHeight(),
outArgsA[i].value->getWidth());
CpuMatrix matB(outArgsB[i].value->getHeight(),
outArgsB[i].value->getWidth());
matA.copyFrom(*outArgsA[i].value);
matB.copyFrom(*outArgsB[i].value);
LOG(INFO) << "\n--------------------------------"
<< " Check Network Output_" << i << ":"
<< " -------------------------------------\n";
checkBuffer(matA.getData(),
"network A output",
matB.getData(),
"network B output",
matA.getElementCnt(),
matA.getWidth());
}
vector<ParameterPtr>& parametersA = comDataA.parameters;
vector<ParameterPtr>& parametersB = comDataB.parameters;
LOG(INFO) << "\n\n--------------------------------"
<< " Check Gradient Machine Parameters:"
<< " -------------------------------------\n";
for (size_t i = 0; i < parametersA.size(); ++i) {
ParameterPtr parameterA, parameterB;
parameterA = parametersA[i];
parameterB = parametersB[i];
CpuVector paraA(parameterA->getSize());
CpuVector paraB(parameterB->getSize());
paraA.copyFrom(*parameterA->getBuf(PARAMETER_VALUE));
paraB.copyFrom(*parameterB->getBuf(PARAMETER_VALUE));
LOG(INFO) << "\n\n----------- PARAMETER_VALUE: " << parameterA->getName()
<< " ; size : " << paraA.getSize() << " ------------";
checkBuffer(paraA.getData(),
"Network A",
paraB.getData(),
"Network B",
paraA.getSize());
CpuVector gradA(*parameterA->getBuf(PARAMETER_GRADIENT));
CpuVector gradB(*parameterB->getBuf(PARAMETER_GRADIENT));
LOG(INFO) << "\n\n----------- PARAMETER_GRADIENT: " << parameterA->getName()
<< " ; size : " << gradA.getSize() << " -----------";
checkBuffer(gradA.getData(),
"Network A",
gradB.getData(),
"Network B",
gradA.getSize());
}
}
TEST(Trainer, create) {
ComData dataA;
calcGradient(dataA, FLAGS_config_file_a);
LOG(INFO) << "\n\ntraining of Network A is finished\n\n";
ComData dataB;
calcGradient(dataB, FLAGS_config_file_b);
LOG(INFO) << "\n\ntraining of the Network B is finished\n\n";
compareGradient(dataA, dataB);
}
int main(int argc, char** argv) {
paddle::initMain(argc, argv);
testing::InitGoogleTest(&argc, argv);
initPython(argc, argv);
#ifndef PADDLE_TYPE_DOUBLE
if (FLAGS_need_high_accuracy) {
LOG(INFO) << "skip test due to it's need high accuracy";
return 0;
}
if (FLAGS_max_diff_ratio == 0.0f) {
FLAGS_max_diff_ratio = 2e-4;
LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio
<< " in low accuracy mode";
}
#else
if (FLAGS_max_diff_ratio == 0.0f) {
FLAGS_max_diff_ratio = 2e-7;
LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio
<< " in high accuracy mode";
}
#endif
int ret = RUN_ALL_TESTS();
return ret;
}
......@@ -25,45 +25,9 @@ limitations under the License. */
#include <unordered_set>
#include "picojson.h"
void checkEqual(const paddle::Argument& expect, const paddle::Argument& actual);
void checkValue(std::vector<paddle::Argument>& arguments, picojson::array& arr);
const std::string kDir = "./trainer/tests/pydata_provider_wrapper_dir/";
TEST(PyDataProviderWrapper, NoSequenceData) {
paddle::DataConfig conf;
conf.set_type("py");
conf.set_load_data_module(std::string("testPyDataWrapper"));
conf.set_load_data_object(std::string("processNonSequenceData"));
conf.set_async_load_data(false);
conf.clear_files();
conf.set_files(kDir + "test_pydata_provider_wrapper.list");
paddle::DataProviderPtr provider(paddle::DataProvider::create(conf, false));
provider->setSkipShuffle();
provider->reset();
paddle::DataBatch batchFromPy;
provider->getNextBatch(100, &batchFromPy);
paddle::DataConfig conf2;
conf2.set_type("proto");
conf2.set_async_load_data(false);
conf2.clear_files();
conf2.set_files(kDir + "test_pydata_provider_wrapper.protolist");
provider.reset(paddle::DataProvider::create(conf2, false));
provider->setSkipShuffle();
provider->reset();
paddle::DataBatch batchFromProto;
provider->getNextBatch(100, &batchFromProto);
std::vector<paddle::Argument>& pyArguments = batchFromPy.getStreams();
std::vector<paddle::Argument>& protoArguments = batchFromProto.getStreams();
EXPECT_EQ(pyArguments.size(), protoArguments.size());
for (size_t i = 0; i < pyArguments.size(); ++i) {
checkEqual(protoArguments[i], pyArguments[i]);
}
}
TEST(PyDataProviderWrapper, SequenceData) {
paddle::DataConfig conf;
conf.set_type("py");
......@@ -148,66 +112,6 @@ int main(int argc, char** argv) {
return RUN_ALL_TESTS();
}
void checkEqual(const paddle::Argument& expect,
const paddle::Argument& actual) {
if (expect.value) {
EXPECT_TRUE(actual.value != nullptr);
paddle::Matrix* e = expect.value.get();
paddle::Matrix* a = actual.value.get();
EXPECT_EQ(e->getWidth(), a->getWidth());
EXPECT_EQ(e->getHeight(), a->getHeight());
if (dynamic_cast<paddle::CpuSparseMatrix*>(e)) {
paddle::CpuSparseMatrix* se = dynamic_cast<paddle::CpuSparseMatrix*>(e);
paddle::CpuSparseMatrix* sa = dynamic_cast<paddle::CpuSparseMatrix*>(a);
EXPECT_EQ(se->getFormat(), sa->getFormat());
EXPECT_EQ(se->getElementCnt(), sa->getElementCnt());
size_t rowSize = se->getFormat() == paddle::SPARSE_CSC
? se->getElementCnt()
: se->getHeight() + 1;
size_t colSize = se->getFormat() == paddle::SPARSE_CSC
? se->getWidth() + 1
: se->getElementCnt();
for (size_t i = 0; i < rowSize; ++i) {
EXPECT_EQ(se->getRows()[i], sa->getRows()[i]);
}
for (size_t i = 0; i < colSize; ++i) {
EXPECT_EQ(se->getCols()[i], sa->getCols()[i]);
}
if (se->getValueType() == paddle::FLOAT_VALUE) {
EXPECT_EQ(paddle::FLOAT_VALUE, sa->getValueType());
for (size_t i = 0; i < se->getElementCnt(); ++i) {
EXPECT_EQ(se->getValue()[i], sa->getValue()[i]);
}
}
} else if (dynamic_cast<paddle::CpuMatrix*>(e)) {
EXPECT_EQ(e->getElementCnt(), a->getElementCnt());
for (size_t i = 0; i < e->getElementCnt(); ++i) {
EXPECT_EQ(e->getData()[i], a->getData()[i]);
}
}
}
if (expect.ids) {
EXPECT_TRUE(actual.ids != nullptr);
paddle::VectorT<int>* e = expect.ids.get();
paddle::VectorT<int>* a = actual.ids.get();
EXPECT_EQ(e->getSize(), a->getSize());
for (size_t i = 0; i < e->getSize(); ++i) {
EXPECT_EQ(e->getData()[i], a->getData()[i]);
}
}
if (expect.strs) {
EXPECT_TRUE(actual.strs != nullptr);
std::vector<std::string>* e = expect.strs.get();
std::vector<std::string>* a = actual.strs.get();
EXPECT_EQ(e->size(), a->size());
for (size_t i = 0; i < e->size(); ++i) {
EXPECT_EQ((*e)[i], (*a)[i]);
}
}
}
void checkValue(std::vector<paddle::Argument>& arguments,
picojson::array& arr) {
// CHECK SLOT 0, Sparse Value.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册