提交 5905d0e8 编写于 作者: Y Yu Yang

Merge branch 'develop' of github.com:baidu/Paddle into feature/better_infer_interface

...@@ -13,9 +13,10 @@ ...@@ -13,9 +13,10 @@
# limitations under the License # limitations under the License
import sys import sys
import paddle.v2 as paddle import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop from api_v2_vgg import vgg_bn_drop
from api_v2_resnet import resnet_cifar10
def main(): def main():
...@@ -23,16 +24,16 @@ def main(): ...@@ -23,16 +24,16 @@ def main():
classdim = 10 classdim = 10
# PaddlePaddle init # PaddlePaddle init
paddle.init(use_gpu=True, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data( image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim)) name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config # Add neural network config
# option 1. resnet # option 1. resnet
net = resnet_cifar10(image, depth=32) # net = resnet_cifar10(image, depth=32)
# option 2. vgg # option 2. vgg
# net = vgg_bn_drop(image) net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net, out = paddle.layer.fc(input=net,
size=classdim, size=classdim,
...@@ -68,7 +69,7 @@ def main(): ...@@ -68,7 +69,7 @@ def main():
result = trainer.test( result = trainer.test(
reader=paddle.batch( reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128), paddle.dataset.cifar.test10(), batch_size=128),
reader_dict={'image': 0, feeding={'image': 0,
'label': 1}) 'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
...@@ -83,7 +84,7 @@ def main(): ...@@ -83,7 +84,7 @@ def main():
batch_size=128), batch_size=128),
num_passes=5, num_passes=5,
event_handler=event_handler, event_handler=event_handler,
reader_dict={'image': 0, feeding={'image': 0,
'label': 1}) 'label': 1})
......
...@@ -30,25 +30,25 @@ def main(): ...@@ -30,25 +30,25 @@ def main():
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % ( print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost, event.metrics) event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass): if isinstance(event, paddle.event.EndPass):
if (event.pass_id + 1) % 10 == 0:
result = trainer.test( result = trainer.test(
reader=paddle.reader.batched( reader=paddle.batch(
uci_housing.test(), batch_size=2), uci_housing.test(), batch_size=2),
reader_dict={'x': 0, feeding={'x': 0,
'y': 1}) 'y': 1})
if event.pass_id % 10 == 0: print "Test %d, %.2f" % (event.pass_id, result.cost)
print "Test %d, %s" % (event.pass_id, result.metrics)
# training # training
trainer.train( trainer.train(
reader=paddle.reader.batched( reader=paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
uci_housing.train(), buf_size=500), uci_housing.train(), buf_size=500),
batch_size=2), batch_size=2),
reader_dict={'x': 0, feeding={'x': 0,
'y': 1}, 'y': 1},
event_handler=event_handler, event_handler=event_handler,
num_passes=30) num_passes=30)
......
...@@ -5,3 +5,6 @@ plot.png ...@@ -5,3 +5,6 @@ plot.png
train.log train.log
*pyc *pyc
.ipynb_checkpoints .ipynb_checkpoints
params.pkl
params.tar
params.tar.gz
import paddle.v2 as paddle import paddle.v2 as paddle
import gzip
def softmax_regression(img): def softmax_regression(img):
...@@ -71,6 +72,10 @@ def main(): ...@@ -71,6 +72,10 @@ def main():
cost = paddle.layer.classification_cost(input=predict, label=label) cost = paddle.layer.classification_cost(input=predict, label=label)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum( optimizer = paddle.optimizer.Momentum(
...@@ -86,10 +91,18 @@ def main(): ...@@ -86,10 +91,18 @@ def main():
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 1000 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % ( result = trainer.test(reader=paddle.batch(
event.pass_id, event.batch_id, event.cost, event.metrics) paddle.dataset.mnist.test(), batch_size=256))
if isinstance(event, paddle.event.EndPass):
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
with gzip.open('params.tar.gz', 'w') as f:
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch( result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128)) paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % ( print "Test with Pass %d, Cost %f, %s\n" % (
......
...@@ -163,11 +163,11 @@ def main(): ...@@ -163,11 +163,11 @@ def main():
update_equation=optimizer) update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.reader.batched( trn_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10) conll05.test(), buf_size=8192), batch_size=10)
reader_dict = { feeding = {
'word_data': 0, 'word_data': 0,
'ctx_n2_data': 1, 'ctx_n2_data': 1,
'ctx_n1_data': 2, 'ctx_n1_data': 2,
...@@ -183,7 +183,7 @@ def main(): ...@@ -183,7 +183,7 @@ def main():
reader=trn_reader, reader=trn_reader,
event_handler=event_handler, event_handler=event_handler,
num_passes=10000, num_passes=10000,
reader_dict=reader_dict) feeding=feeding)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling ...@@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling
import paddle.v2 as paddle import paddle.v2 as paddle
def convolution_net(input_dim, def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
class_dim=2,
emb_dim=128,
hid_dim=128,
is_predict=False):
data = paddle.layer.data("word", data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim)) paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim) emb = paddle.layer.embedding(input=data, size=emb_dim)
...@@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim, ...@@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim,
class_dim=2, class_dim=2,
emb_dim=128, emb_dim=128,
hid_dim=512, hid_dim=512,
stacked_num=3, stacked_num=3):
is_predict=False):
""" """
A Wrapper for sentiment classification task. A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network, This network uses bi-directional recurrent network,
...@@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim, ...@@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim,
if __name__ == '__main__': if __name__ == '__main__':
# init # init
paddle.init(use_gpu=True, trainer_count=4) paddle.init(use_gpu=False, trainer_count=4)
# network config # network config
print 'load dictionary...' print 'load dictionary...'
...@@ -143,10 +138,10 @@ if __name__ == '__main__': ...@@ -143,10 +138,10 @@ if __name__ == '__main__':
sys.stdout.flush() sys.stdout.flush()
if isinstance(event, paddle.event.EndPass): if isinstance(event, paddle.event.EndPass):
result = trainer.test( result = trainer.test(
reader=paddle.reader.batched( reader=paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict), lambda: paddle.dataset.imdb.test(word_dict),
batch_size=128), batch_size=128),
reader_dict={'word': 0, feeding={'word': 0,
'label': 1}) 'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
...@@ -156,11 +151,11 @@ if __name__ == '__main__': ...@@ -156,11 +151,11 @@ if __name__ == '__main__':
update_equation=adam_optimizer) update_equation=adam_optimizer)
trainer.train( trainer.train(
reader=paddle.reader.batched( reader=paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100), batch_size=100),
event_handler=event_handler, event_handler=event_handler,
reader_dict={'word': 0, feeding={'word': 0,
'label': 1}, 'label': 1},
num_passes=10) num_passes=10)
import os
import paddle.v2 as paddle import paddle.v2 as paddle
from seqToseq_net_v2 import seqToseq_net_v2
# Data Definiation.
# TODO:This code should be merged to dataset package.
data_dir = "./data/pre-wmt14"
src_lang_dict = os.path.join(data_dir, 'src.dict')
trg_lang_dict = os.path.join(data_dir, 'trg.dict')
source_dict_dim = len(open(src_lang_dict, "r").readlines())
target_dict_dim = len(open(trg_lang_dict, "r").readlines())
def read_to_dict(dict_path):
with open(dict_path, "r") as fin:
out_dict = {
line.strip(): line_count
for line_count, line in enumerate(fin)
}
return out_dict
src_dict = read_to_dict(src_lang_dict)
trg_dict = read_to_dict(trg_lang_dict)
train_list = os.path.join(data_dir, 'train.list')
test_list = os.path.join(data_dir, 'test.list')
UNK_IDX = 2 def seqToseq_net(source_dict_dim, target_dict_dim):
START = "<s>" ### Network Architecture
END = "<e>" word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
def _get_ids(s, dictionary):
words = s.strip().split() #### Encoder
return [dictionary[START]] + \ src_word_id = paddle.layer.data(
[dictionary.get(w, UNK_IDX) for w in words] + \ name='source_language_word',
[dictionary[END]] type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
def train_reader(file_name): size=word_vector_dim,
def reader(): param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
with open(file_name, 'r') as f: src_forward = paddle.networks.simple_gru(
for line_count, line in enumerate(f): input=src_embedding, size=encoder_size)
line_split = line.strip().split('\t') src_backward = paddle.networks.simple_gru(
if len(line_split) != 2: input=src_embedding, size=encoder_size, reverse=True)
continue encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
src_seq = line_split[0] # one source sequence
src_ids = _get_ids(src_seq, src_dict) #### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
trg_seq = line_split[1] # one target sequence encoded_proj += paddle.layer.full_matrix_projection(
trg_words = trg_seq.split() input=encoded_vector)
trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
backward_first = paddle.layer.first_seq(input=src_backward)
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80: with paddle.layer.mixed(
continue size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
trg_ids_next = trg_ids + [trg_dict[END]] decoder_boot += paddle.layer.full_matrix_projection(
trg_ids = [trg_dict[START]] + trg_ids input=backward_first)
yield src_ids, trg_ids, trg_ids_next def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
return reader decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
def main(): def main():
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# define network topology # define network topology
cost = seqToseq_net_v2(source_dict_dim, target_dict_dim) cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
# define optimize method and trainer # define optimize method and trainer
...@@ -80,15 +110,15 @@ def main(): ...@@ -80,15 +110,15 @@ def main():
update_equation=optimizer) update_equation=optimizer)
# define data reader # define data reader
reader_dict = { feeding = {
'source_language_word': 0, 'source_language_word': 0,
'target_language_word': 1, 'target_language_word': 1,
'target_language_next_word': 2 'target_language_next_word': 2
} }
wmt14_reader = paddle.reader.batched( wmt14_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
train_reader("data/pre-wmt14/train/train"), buf_size=8192), paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192),
batch_size=5) batch_size=5)
# define event_handler callback # define event_handler callback
...@@ -103,7 +133,7 @@ def main(): ...@@ -103,7 +133,7 @@ def main():
reader=wmt14_reader, reader=wmt14_reader,
event_handler=event_handler, event_handler=event_handler,
num_passes=10000, num_passes=10000,
reader_dict=reader_dict) feeding=feeding)
if __name__ == '__main__': if __name__ == '__main__':
......
import paddle.v2 as paddle
def seqToseq_net_v2(source_dict_dim, target_dict_dim):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
...@@ -39,6 +39,7 @@ register_unary_math_op('abs', act.AbsActivation()) ...@@ -39,6 +39,7 @@ register_unary_math_op('abs', act.AbsActivation())
register_unary_math_op('sigmoid', act.SigmoidActivation()) register_unary_math_op('sigmoid', act.SigmoidActivation())
register_unary_math_op('tanh', act.TanhActivation()) register_unary_math_op('tanh', act.TanhActivation())
register_unary_math_op('square', act.SquareActivation()) register_unary_math_op('square', act.SquareActivation())
register_unary_math_op('relu', act.ReluActivation())
def add(layeroutput, other): def add(layeroutput, other):
......
...@@ -7,8 +7,9 @@ x = layer_math.exp(x) ...@@ -7,8 +7,9 @@ x = layer_math.exp(x)
x = layer_math.log(x) x = layer_math.log(x)
x = layer_math.abs(x) x = layer_math.abs(x)
x = layer_math.sigmoid(x) x = layer_math.sigmoid(x)
x = layer_math.tanh(x)
x = layer_math.square(x) x = layer_math.square(x)
x = layer_math.square(x) x = layer_math.relu(x)
y = 1 + x y = 1 + x
y = y + 1 y = y + 1
y = x + y y = x + y
......
...@@ -65,13 +65,28 @@ layers { ...@@ -65,13 +65,28 @@ layers {
} }
} }
} }
layers {
name: "__tanh_0__"
type: "mixed"
size: 100
active_type: "tanh"
inputs {
input_layer_name: "__sigmoid_0__"
proj_conf {
type: "identity"
name: "___tanh_0__.w0"
input_size: 100
output_size: 100
}
}
}
layers { layers {
name: "__square_0__" name: "__square_0__"
type: "mixed" type: "mixed"
size: 100 size: 100
active_type: "square" active_type: "square"
inputs { inputs {
input_layer_name: "__sigmoid_0__" input_layer_name: "__tanh_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___square_0__.w0" name: "___square_0__.w0"
...@@ -81,15 +96,15 @@ layers { ...@@ -81,15 +96,15 @@ layers {
} }
} }
layers { layers {
name: "__square_1__" name: "__relu_0__"
type: "mixed" type: "mixed"
size: 100 size: 100
active_type: "square" active_type: "relu"
inputs { inputs {
input_layer_name: "__square_0__" input_layer_name: "__square_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___square_1__.w0" name: "___relu_0__.w0"
input_size: 100 input_size: 100
output_size: 100 output_size: 100
} }
...@@ -101,7 +116,7 @@ layers { ...@@ -101,7 +116,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
} }
slope: 1.0 slope: 1.0
intercept: 1 intercept: 1
...@@ -123,7 +138,7 @@ layers { ...@@ -123,7 +138,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___mixed_0__.w0" name: "___mixed_0__.w0"
...@@ -147,7 +162,7 @@ layers { ...@@ -147,7 +162,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
} }
slope: -1.0 slope: -1.0
intercept: 0.0 intercept: 0.0
...@@ -339,8 +354,9 @@ sub_models { ...@@ -339,8 +354,9 @@ sub_models {
layer_names: "__log_0__" layer_names: "__log_0__"
layer_names: "__abs_0__" layer_names: "__abs_0__"
layer_names: "__sigmoid_0__" layer_names: "__sigmoid_0__"
layer_names: "__tanh_0__"
layer_names: "__square_0__" layer_names: "__square_0__"
layer_names: "__square_1__" layer_names: "__relu_0__"
layer_names: "__slope_intercept_layer_0__" layer_names: "__slope_intercept_layer_0__"
layer_names: "__slope_intercept_layer_1__" layer_names: "__slope_intercept_layer_1__"
layer_names: "__mixed_0__" layer_names: "__mixed_0__"
......
...@@ -14,11 +14,18 @@ ...@@ -14,11 +14,18 @@
from py_paddle import DataProviderConverter from py_paddle import DataProviderConverter
import data_type import paddle.trainer.PyDataProvider2 as pydp2
__all__ = ['DataFeeder'] __all__ = ['DataFeeder']
def default_feeding_map(data_types):
reader_dict = dict()
for i, tp in enumerate(data_types):
reader_dict[tp[0]] = i
return reader_dict
class DataFeeder(DataProviderConverter): class DataFeeder(DataProviderConverter):
""" """
DataFeeder converts the data returned by paddle.reader into a data structure DataFeeder converts the data returned by paddle.reader into a data structure
...@@ -60,16 +67,21 @@ class DataFeeder(DataProviderConverter): ...@@ -60,16 +67,21 @@ class DataFeeder(DataProviderConverter):
:type data_types: list :type data_types: list
:param reader_dict: A dictionary to specify the position of each data :param reader_dict: A dictionary to specify the position of each data
in the input data. in the input data.
:type reader_dict: dict :type feeding: dict
""" """
def __init__(self, data_types, reader_dict): def __init__(self, data_types, feeding=None):
self.input_names = [] self.input_names = []
input_types = [] input_types = []
self.reader_dict = reader_dict if feeding is None:
feeding = default_feeding_map(data_types)
self.feeding = feeding
for each in data_types: for each in data_types:
self.input_names.append(each[0]) self.input_names.append(each[0])
assert isinstance(each[1], data_type.InputType) if not isinstance(each[1], pydp2.InputType):
raise TypeError("second item in each data_type should be an "
"InputType")
input_types.append(each[1]) input_types.append(each[1])
DataProviderConverter.__init__(self, input_types) DataProviderConverter.__init__(self, input_types)
...@@ -90,7 +102,7 @@ class DataFeeder(DataProviderConverter): ...@@ -90,7 +102,7 @@ class DataFeeder(DataProviderConverter):
for each in data: for each in data:
reorder = [] reorder = []
for name in self.input_names: for name in self.input_names:
reorder.append(each[self.reader_dict[name]]) reorder.append(each[self.feeding[name]])
retv.append(reorder) retv.append(reorder)
return retv return retv
......
...@@ -14,129 +14,92 @@ ...@@ -14,129 +14,92 @@
""" """
wmt14 dataset wmt14 dataset
""" """
import paddle.v2.dataset.common
import tarfile import tarfile
import os.path
import itertools import paddle.v2.dataset.common
__all__ = ['train', 'test', 'build_dict'] __all__ = ['train', 'test', 'build_dict']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
URL_TRAIN = 'http://localhost:8000/train.tgz' # this is a small set of data for test. The original data is too large and will be add later.
MD5_TRAIN = '72de99da2830ea5a3a2c4eb36092bbc7' URL_TRAIN = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
MD5_TRAIN = 'a755315dd01c2c35bde29a744ede23a6'
def word_count(f, word_freq=None): START = "<s>"
add = paddle.v2.dataset.common.dict_add END = "<e>"
if word_freq == None: UNK = "<unk>"
word_freq = {} UNK_IDX = 2
for l in f:
for w in l.strip().split(): def __read_to_dict__(tar_file, dict_size):
add(word_freq, w) def __to_dict__(fd, size):
add(word_freq, '<s>') out_dict = dict()
add(word_freq, '<e>') for line_count, line in enumerate(fd):
if line_count < size:
return word_freq out_dict[line.strip()] = line_count
else:
break
def get_word_dix(word_freq): return out_dict
TYPO_FREQ = 50
word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items()) with tarfile.open(tar_file, mode='r') as f:
word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0])) names = [
words, _ = list(zip(*word_freq_sorted)) each_item.name for each_item in f
word_idx = dict(zip(words, xrange(len(words)))) if each_item.name.endswith("src.dict")
word_idx['<unk>'] = len(words) ]
return word_idx assert len(names) == 1
src_dict = __to_dict__(f.extractfile(names[0]), dict_size)
names = [
def get_word_freq(train, dev): each_item.name for each_item in f
word_freq = word_count(train, word_count(dev)) if each_item.name.endswith("trg.dict")
if '<unk>' in word_freq: ]
# remove <unk> for now, since we will set it as last index assert len(names) == 1
del word_freq['<unk>'] trg_dict = __to_dict__(f.extractfile(names[0]), dict_size)
return word_freq return src_dict, trg_dict
def build_dict():
base_dir = './wmt14-data'
train_en_filename = base_dir + '/train/train.en'
train_fr_filename = base_dir + '/train/train.fr'
dev_en_filename = base_dir + '/dev/ntst1213.en'
dev_fr_filename = base_dir + '/dev/ntst1213.fr'
if not os.path.exists(train_en_filename) or not os.path.exists(
train_fr_filename):
with tarfile.open(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14',
MD5_TRAIN)) as tf:
tf.extractall(base_dir)
if not os.path.exists(dev_en_filename) or not os.path.exists(
dev_fr_filename):
with tarfile.open(
paddle.v2.dataset.common.download(URL_DEV_TEST, 'wmt14',
MD5_DEV_TEST)) as tf:
tf.extractall(base_dir)
f_en = open(train_en_filename)
f_fr = open(train_fr_filename)
f_en_dev = open(dev_en_filename)
f_fr_dev = open(dev_fr_filename)
word_freq_en = get_word_freq(f_en, f_en_dev)
word_freq_fr = get_word_freq(f_fr, f_fr_dev)
f_en.close()
f_fr.close()
f_en_dev.close()
f_fr_dev.close()
return get_word_dix(word_freq_en), get_word_dix(word_freq_fr)
def reader_creator(directory, path_en, path_fr, URL, MD5, dict_en, dict_fr): def reader_creator(tar_file, file_name, dict_size):
def reader(): def reader():
if not os.path.exists(path_en) or not os.path.exists(path_fr): src_dict, trg_dict = __read_to_dict__(tar_file, dict_size)
with tarfile.open( with tarfile.open(tar_file, mode='r') as f:
paddle.v2.dataset.common.download(URL, 'wmt14', MD5)) as tf: names = [
tf.extractall(directory) each_item.name for each_item in f
if each_item.name.endswith(file_name)
f_en = open(path_en) ]
f_fr = open(path_fr) for name in names:
UNK_en = dict_en['<unk>'] for line in f.extractfile(name):
UNK_fr = dict_fr['<unk>'] line_split = line.strip().split('\t')
if len(line_split) != 2:
for en, fr in itertools.izip(f_en, f_fr): continue
src_ids = [dict_en.get(w, UNK_en) for w in en.strip().split()] src_seq = line_split[0] # one source sequence
tar_ids = [ src_words = src_seq.split()
dict_fr.get(w, UNK_fr) src_ids = [
for w in ['<s>'] + fr.strip().split() + ['<e>'] src_dict.get(w, UNK_IDX)
for w in [START] + src_words + [END]
] ]
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode # remove sequence whose length > 80 in training mode
if len(src_ids) == 0 or len(tar_ids) <= 1 or len( if len(src_ids) > 80 or len(trg_ids) > 80:
src_ids) > 80 or len(tar_ids) > 80:
continue continue
trg_ids_next = trg_ids + [trg_dict[END]]
trg_ids = [trg_dict[START]] + trg_ids
yield src_ids, tar_ids[:-1], tar_ids[1:] yield src_ids, trg_ids, trg_ids_next
f_en.close()
f_fr.close()
return reader return reader
def train(dict_en, dict_fr): def train(dict_size):
directory = './wmt14-data' return reader_creator(
return reader_creator(directory, directory + '/train/train.en', paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
directory + '/train/train.fr', URL_TRAIN, MD5_TRAIN, 'train/train', dict_size)
dict_en, dict_fr)
def test(dict_en, dict_fr): def test(dict_size):
directory = './wmt14-data' return reader_creator(
return reader_creator(directory, directory + '/dev/ntst1213.en', paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
directory + '/dev/ntst1213.fr', URL_DEV_TEST, 'test/test', dict_size)
MD5_DEV_TEST, dict_en, dict_fr)
...@@ -21,13 +21,8 @@ class Inference(object): ...@@ -21,13 +21,8 @@ class Inference(object):
self.__gradient_machine__ = gm self.__gradient_machine__ = gm
self.__data_types__ = topo.data_type() self.__data_types__ = topo.data_type()
def iter_infer(self, def iter_infer(self, input=None, batch_size=None, reader=None,
input=None, feeding=None):
batch_size=None,
reader=None,
reader_dict=None):
if reader_dict is None:
reader_dict = self.default_reader_dict()
if reader is None: if reader is None:
assert input is not None and isinstance(input, collections.Iterable) assert input is not None and isinstance(input, collections.Iterable)
...@@ -51,7 +46,7 @@ class Inference(object): ...@@ -51,7 +46,7 @@ class Inference(object):
raise ValueError("User should set either input or reader, " raise ValueError("User should set either input or reader, "
"should not set them both.") "should not set them both.")
feeder = DataFeeder(self.__data_types__, reader_dict) feeder = DataFeeder(self.__data_types__, feeding)
self.__gradient_machine__.start() self.__gradient_machine__.start()
for data_batch in reader(): for data_batch in reader():
yield self.__gradient_machine__.forwardTest(feeder(data_batch)) yield self.__gradient_machine__.forwardTest(feeder(data_batch))
...@@ -74,19 +69,13 @@ class Inference(object): ...@@ -74,19 +69,13 @@ class Inference(object):
else: else:
return retv return retv
def default_reader_dict(self):
reader_dict = dict()
for i, tp in enumerate(self.__data_types__):
reader_dict[tp[0]] = i
return reader_dict
def infer(output, def infer(output,
parameters, parameters,
input=None, input=None,
batch_size=None, batch_size=None,
reader=None, reader=None,
reader_dict=None, feeding=None,
field='value'): field='value'):
""" """
Infer a neural network by given neural network output and parameters. The Infer a neural network by given neural network output and parameters. The
...@@ -113,7 +102,7 @@ def infer(output, ...@@ -113,7 +102,7 @@ def infer(output,
:param reader: input data reader creator in batch. If this field is set, the :param reader: input data reader creator in batch. If this field is set, the
`input` and `batch_size` will be ignored. `input` and `batch_size` will be ignored.
:type reader: callable :type reader: callable
:param reader_dict: Reader dictionary. Default could generate from input :param feeding: Reader dictionary. Default could generate from input
value. value.
:param field: The prediction field. It should in [`value`, `ids`]. `value` :param field: The prediction field. It should in [`value`, `ids`]. `value`
means return the prediction probabilities, `ids` means return means return the prediction probabilities, `ids` means return
...@@ -129,4 +118,4 @@ def infer(output, ...@@ -129,4 +118,4 @@ def infer(output,
input=input, input=input,
batch_size=batch_size, batch_size=batch_size,
reader=reader, reader=reader,
reader_dict=reader_dict) feeding=feeding)
import numpy as np import numpy as np
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import struct
import tarfile
import cStringIO
from topology import Topology from topology import Topology
__all__ = ['Parameters', 'create'] __all__ = ['Parameters', 'create']
...@@ -122,6 +124,12 @@ class Parameters(object): ...@@ -122,6 +124,12 @@ class Parameters(object):
if len(self.__gradient_machines__) == 0: if len(self.__gradient_machines__) == 0:
# create new parameter in python numpy. # create new parameter in python numpy.
if len(self.__tmp_params__) != 0:
ret_list = [
mat for name, mat in self.__tmp_params__ if name == key
]
if len(ret_list) == 1:
return ret_list[0]
return np.ndarray(shape=shape, dtype=np.float32) return np.ndarray(shape=shape, dtype=np.float32)
else: else:
for each_gradient_machine in self.__gradient_machines__: for each_gradient_machine in self.__gradient_machines__:
...@@ -228,6 +236,67 @@ class Parameters(object): ...@@ -228,6 +236,67 @@ class Parameters(object):
self.__gradient_machines__.append(gradient_machine) self.__gradient_machines__.append(gradient_machine)
def serialize(self, name, f):
"""
:param name:
:param f:
:type f: file
:return:
"""
param = self.get(name)
size = reduce(lambda a, b: a * b, param.shape)
f.write(struct.pack("IIQ", 0, 4, size))
param = param.astype(np.float32)
f.write(param.tobytes())
def deserialize(self, name, f):
"""
:param name:
:param f:
:type f: file
:return:
"""
f.read(16) # header
arr = np.frombuffer(f.read(), dtype=np.float32)
self.set(name, arr.reshape(self.get_shape(name)))
def to_tar(self, f):
tar = tarfile.TarFile(fileobj=f, mode='w')
for nm in self.names():
buf = cStringIO.StringIO()
self.serialize(nm, buf)
tarinfo = tarfile.TarInfo(name=nm)
buf.seek(0)
tarinfo.size = len(buf.getvalue())
tar.addfile(tarinfo, buf)
conf = self.__param_conf__[nm]
confStr = conf.SerializeToString()
tarinfo = tarfile.TarInfo(name="%s.protobuf" % nm)
tarinfo.size = len(confStr)
buf = cStringIO.StringIO(confStr)
buf.seek(0)
tar.addfile(tarinfo, fileobj=buf)
@staticmethod
def from_tar(f):
params = Parameters()
tar = tarfile.TarFile(fileobj=f, mode='r')
for finfo in tar:
assert isinstance(finfo, tarfile.TarInfo)
if finfo.name.endswith('.protobuf'):
f = tar.extractfile(finfo)
conf = ParameterConfig()
conf.ParseFromString(f.read())
params.__append_config__(conf)
for param_name in params.names():
f = tar.extractfile(param_name)
params.deserialize(param_name, f)
return params
def __get_parameter_in_gradient_machine__(gradient_machine, name): def __get_parameter_in_gradient_machine__(gradient_machine, name):
""" """
......
...@@ -22,7 +22,7 @@ cd $SCRIPTPATH ...@@ -22,7 +22,7 @@ cd $SCRIPTPATH
$1 -m pip install ../../../../paddle/dist/*.whl $1 -m pip install ../../../../paddle/dist/*.whl
test_list="test_data_feeder.py" test_list="test_data_feeder.py test_parameters.py"
export PYTHONPATH=$PWD/../../../../python/ export PYTHONPATH=$PWD/../../../../python/
......
import unittest
import sys
try:
import py_paddle
del py_paddle
except ImportError:
print >> sys.stderr, "It seems swig of Paddle is not installed, this " \
"unittest will not be run."
sys.exit(0)
import paddle.v2.parameters as parameters
from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import random
import cStringIO
import numpy
def __rand_param_config__(name):
conf = ParameterConfig()
conf.name = name
size = 1
for i in xrange(2):
dim = random.randint(1, 1000)
conf.dims.append(dim)
size *= dim
conf.size = size
assert conf.IsInitialized()
return conf
class TestParameters(unittest.TestCase):
def test_serialization(self):
params = parameters.Parameters()
params.__append_config__(__rand_param_config__("param_0"))
params.__append_config__(__rand_param_config__("param_1"))
for name in params.names():
param = params.get(name)
param[:] = numpy.random.uniform(
-1.0, 1.0, size=params.get_shape(name))
params.set(name, param)
tmp_file = cStringIO.StringIO()
params.to_tar(tmp_file)
tmp_file.seek(0)
params_dup = parameters.Parameters.from_tar(tmp_file)
self.assertEqual(params_dup.names(), params.names())
for name in params.names():
self.assertEqual(params.get_shape(name), params_dup.get_shape(name))
p0 = params.get(name)
p1 = params_dup.get(name)
self.assertTrue(numpy.isclose(p0, p1).all())
if __name__ == '__main__':
unittest.main()
...@@ -57,11 +57,11 @@ class SGD(object): ...@@ -57,11 +57,11 @@ class SGD(object):
self.__topology_in_proto__, api.CREATE_MODE_NORMAL, self.__topology_in_proto__, api.CREATE_MODE_NORMAL,
self.__optimizer__.enable_types()) self.__optimizer__.enable_types())
assert isinstance(gm, api.GradientMachine) assert isinstance(gm, api.GradientMachine)
parameters.append_gradient_machine(gm)
self.__gradient_machine__ = gm self.__gradient_machine__ = gm
self.__gradient_machine__.randParameters() self.__gradient_machine__.randParameters()
parameters.append_gradient_machine(gm)
def train(self, reader, num_passes=1, event_handler=None, reader_dict=None): def train(self, reader, num_passes=1, event_handler=None, feeding=None):
""" """
Training method. Will train num_passes of input data. Training method. Will train num_passes of input data.
...@@ -70,14 +70,13 @@ class SGD(object): ...@@ -70,14 +70,13 @@ class SGD(object):
:param event_handler: Event handler. A method will be invoked when event :param event_handler: Event handler. A method will be invoked when event
occurred. occurred.
:type event_handler: (BaseEvent) => None :type event_handler: (BaseEvent) => None
:param feeding: Feeding is a map of neural network input name and array
index that reader returns.
:type feeding: dict
:return: :return:
""" """
if event_handler is None: if event_handler is None:
event_handler = default_event_handler event_handler = default_event_handler
if reader_dict is None:
reader_dict = self.default_reader_dict()
__check_train_args__(**locals()) __check_train_args__(**locals())
updater = self.__optimizer__.create_local_updater() updater = self.__optimizer__.create_local_updater()
...@@ -89,9 +88,7 @@ class SGD(object): ...@@ -89,9 +88,7 @@ class SGD(object):
pass_evaluator = self.__gradient_machine__.makeEvaluator() pass_evaluator = self.__gradient_machine__.makeEvaluator()
assert isinstance(pass_evaluator, api.Evaluator) assert isinstance(pass_evaluator, api.Evaluator)
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
feeder = DataFeeder(self.__data_types__, feeding)
feeder = DataFeeder(self.__data_types__, reader_dict)
for pass_id in xrange(num_passes): for pass_id in xrange(num_passes):
event_handler(v2_event.BeginPass(pass_id)) event_handler(v2_event.BeginPass(pass_id))
pass_evaluator.start() pass_evaluator.start()
...@@ -125,17 +122,8 @@ class SGD(object): ...@@ -125,17 +122,8 @@ class SGD(object):
event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
self.__gradient_machine__.finish() self.__gradient_machine__.finish()
def default_reader_dict(self): def test(self, reader, feeding=None):
reader_dict = dict() feeder = DataFeeder(self.__data_types__, feeding)
for i, tp in enumerate(self.__data_types__):
reader_dict[tp[0]] = i
return reader_dict
def test(self, reader, reader_dict=None):
if reader_dict is None:
reader_dict = self.default_reader_dict()
feeder = DataFeeder(self.__data_types__, reader_dict)
evaluator = self.__gradient_machine__.makeEvaluator() evaluator = self.__gradient_machine__.makeEvaluator()
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
evaluator.start() evaluator.start()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册