提交 047f3a76 编写于 作者: Y Yancey1989

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

[submodule "book"]
path = book
url = https://github.com/PaddlePaddle/book.git
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
sha: c25201a00e6b0514370501050cf2a8538ac12270 sha: c25201a00e6b0514370501050cf2a8538ac12270
hooks: hooks:
- id: remove-crlf - id: remove-crlf
files: (?!.*third_party)^.*$ files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git - repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2 sha: v0.13.2
hooks: hooks:
- id: yapf - id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ # Bazel BUILD files follow Python syntax. files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469 sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
hooks: hooks:
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
- id: check-merge-conflict - id: check-merge-conflict
- id: check-symlinks - id: check-symlinks
- id: detect-private-key - id: detect-private-key
files: (?!.*third_party)^.*$ files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- id: end-of-file-fixer - id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git - repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
......
...@@ -29,13 +29,16 @@ Luo, Tao ...@@ -29,13 +29,16 @@ Luo, Tao
Lyu, Qin Lyu, Qin
Mao, Hongyue Mao, Hongyue
Qian, Xiaojun Qian, Xiaojun
Qiao, Longfei
Qi, Jun Qi, Jun
Qin, Duohao Qin, Duohao
Shen, Guolong Shen, Guolong
Shi, Guangchuan Shi, Guangchuan
Song, Xiang Song, Xiang
Wang, Helin
Wang, Jiang Wang, Jiang
Wang, Yanfei Wang, Yanfei
Wang, Yi
Wang, Yong Wang, Yong
Weng, Renliang Weng, Renliang
Xu, Tianbing Xu, Tianbing
......
Subproject commit 22ed2a01aee872f055b5f5f212428f481cefc10d
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
FIND_PACKAGE(Protobuf) FIND_PACKAGE(Protobuf 3.1)
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
......
...@@ -13,9 +13,10 @@ ...@@ -13,9 +13,10 @@
# limitations under the License # limitations under the License
import sys import sys
import paddle.v2 as paddle import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop from api_v2_vgg import vgg_bn_drop
from api_v2_resnet import resnet_cifar10
def main(): def main():
...@@ -23,16 +24,16 @@ def main(): ...@@ -23,16 +24,16 @@ def main():
classdim = 10 classdim = 10
# PaddlePaddle init # PaddlePaddle init
paddle.init(use_gpu=True, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data( image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim)) name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config # Add neural network config
# option 1. resnet # option 1. resnet
net = resnet_cifar10(image, depth=32) # net = resnet_cifar10(image, depth=32)
# option 2. vgg # option 2. vgg
# net = vgg_bn_drop(image) net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net, out = paddle.layer.fc(input=net,
size=classdim, size=classdim,
...@@ -68,7 +69,7 @@ def main(): ...@@ -68,7 +69,7 @@ def main():
result = trainer.test( result = trainer.test(
reader=paddle.batch( reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128), paddle.dataset.cifar.test10(), batch_size=128),
reader_dict={'image': 0, feeding={'image': 0,
'label': 1}) 'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
...@@ -83,7 +84,7 @@ def main(): ...@@ -83,7 +84,7 @@ def main():
batch_size=128), batch_size=128),
num_passes=5, num_passes=5,
event_handler=event_handler, event_handler=event_handler,
reader_dict={'image': 0, feeding={'image': 0,
'label': 1}) 'label': 1})
......
...@@ -30,25 +30,25 @@ def main(): ...@@ -30,25 +30,25 @@ def main():
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % ( print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost, event.metrics) event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass): if isinstance(event, paddle.event.EndPass):
if (event.pass_id + 1) % 10 == 0:
result = trainer.test( result = trainer.test(
reader=paddle.reader.batched( reader=paddle.batch(
uci_housing.test(), batch_size=2), uci_housing.test(), batch_size=2),
reader_dict={'x': 0, feeding={'x': 0,
'y': 1}) 'y': 1})
if event.pass_id % 10 == 0: print "Test %d, %.2f" % (event.pass_id, result.cost)
print "Test %d, %s" % (event.pass_id, result.metrics)
# training # training
trainer.train( trainer.train(
reader=paddle.reader.batched( reader=paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
uci_housing.train(), buf_size=500), uci_housing.train(), buf_size=500),
batch_size=2), batch_size=2),
reader_dict={'x': 0, feeding={'x': 0,
'y': 1}, 'y': 1},
event_handler=event_handler, event_handler=event_handler,
num_passes=30) num_passes=30)
......
...@@ -5,3 +5,6 @@ plot.png ...@@ -5,3 +5,6 @@ plot.png
train.log train.log
*pyc *pyc
.ipynb_checkpoints .ipynb_checkpoints
params.pkl
params.tar
params.tar.gz
import paddle.v2 as paddle import paddle.v2 as paddle
import gzip
def softmax_regression(img): def softmax_regression(img):
...@@ -71,6 +72,10 @@ def main(): ...@@ -71,6 +72,10 @@ def main():
cost = paddle.layer.classification_cost(input=predict, label=label) cost = paddle.layer.classification_cost(input=predict, label=label)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum( optimizer = paddle.optimizer.Momentum(
...@@ -86,11 +91,15 @@ def main(): ...@@ -86,11 +91,15 @@ def main():
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 1000 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics) event.pass_id, event.batch_id, event.cost, event.metrics)
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.reader.batched( with gzip.open('params.tar.gz', 'w') as f:
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128)) paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % ( print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics) event.pass_id, result.cost, result.metrics)
...@@ -110,17 +119,16 @@ def main(): ...@@ -110,17 +119,16 @@ def main():
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
test_creator = paddle.dataset.mnist.test()
test_data = []
for item in test_creator():
test_data.append(item[0])
if len(test_data) == 100:
break
# output is a softmax layer. It returns probabilities. # output is a softmax layer. It returns probabilities.
# Shape should be (100, 10) # Shape should be (100, 10)
probs = paddle.infer( probs = paddle.infer(output=predict, parameters=parameters, input=test_data)
output=predict,
parameters=parameters,
reader=paddle.batch(
paddle.reader.firstn(
paddle.reader.map_readers(lambda item: (item[0], ),
paddle.dataset.mnist.test()),
n=100),
batch_size=32))
print probs.shape print probs.shape
......
import paddle.v2 as paddle
import cPickle
import copy
def main():
paddle.init(use_gpu=False)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data(
name='user_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_user_id() + 1))
usr_emb = paddle.layer.embedding(input=uid, size=32)
usr_gender_id = paddle.layer.data(
name='gender_id', type=paddle.data_type.integer_value(2))
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
usr_age_id = paddle.layer.data(
name='age_id',
type=paddle.data_type.integer_value(
len(paddle.dataset.movielens.age_table)))
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
usr_job_id = paddle.layer.data(
name='job_id',
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
) + 1))
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
usr_combined_features = paddle.layer.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
size=200,
act=paddle.activation.Tanh())
mov_id = paddle.layer.data(
name='movie_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_movie_id() + 1))
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
mov_categories = paddle.layer.data(
name='category_id',
type=paddle.data_type.sparse_binary_vector(
len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
mov_title_id = paddle.layer.data(
name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
mov_title_conv = paddle.networks.sequence_conv_pool(
input=mov_title_emb, hidden_size=32, context_len=3)
mov_combined_features = paddle.layer.fc(
input=[mov_emb, mov_categories_hidden, mov_title_conv],
size=200,
act=paddle.activation.Tanh())
inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.regression_cost(
input=inference,
label=paddle.layer.data(
name='score', type=paddle.data_type.dense_vector(1)))
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(
learning_rate=1e-4))
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.2f" % (
event.pass_id, event.batch_id, event.cost)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=256),
event_handler=event_handler,
feeding=feeding,
num_passes=1)
user_id = 234
movie_id = 345
user = paddle.dataset.movielens.user_info()[user_id]
movie = paddle.dataset.movielens.movie_info()[movie_id]
feature = user.value() + movie.value()
def reader():
yield feature
infer_dict = copy.copy(feeding)
del infer_dict['score']
prediction = paddle.infer(
output=inference,
parameters=parameters,
reader=paddle.batch(
reader, batch_size=32),
feeding=infer_dict)
print(prediction + 5) / 2
if __name__ == '__main__':
main()
...@@ -163,11 +163,11 @@ def main(): ...@@ -163,11 +163,11 @@ def main():
update_equation=optimizer) update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.reader.batched( trn_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10) conll05.test(), buf_size=8192), batch_size=10)
reader_dict = { feeding = {
'word_data': 0, 'word_data': 0,
'ctx_n2_data': 1, 'ctx_n2_data': 1,
'ctx_n1_data': 2, 'ctx_n1_data': 2,
...@@ -183,7 +183,7 @@ def main(): ...@@ -183,7 +183,7 @@ def main():
reader=trn_reader, reader=trn_reader,
event_handler=event_handler, event_handler=event_handler,
num_passes=10000, num_passes=10000,
reader_dict=reader_dict) feeding=feeding)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling ...@@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling
import paddle.v2 as paddle import paddle.v2 as paddle
def convolution_net(input_dim, def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
class_dim=2,
emb_dim=128,
hid_dim=128,
is_predict=False):
data = paddle.layer.data("word", data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim)) paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim) emb = paddle.layer.embedding(input=data, size=emb_dim)
...@@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim, ...@@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim,
class_dim=2, class_dim=2,
emb_dim=128, emb_dim=128,
hid_dim=512, hid_dim=512,
stacked_num=3, stacked_num=3):
is_predict=False):
""" """
A Wrapper for sentiment classification task. A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network, This network uses bi-directional recurrent network,
...@@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim, ...@@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim,
if __name__ == '__main__': if __name__ == '__main__':
# init # init
paddle.init(use_gpu=True, trainer_count=4) paddle.init(use_gpu=False, trainer_count=4)
# network config # network config
print 'load dictionary...' print 'load dictionary...'
...@@ -143,10 +138,10 @@ if __name__ == '__main__': ...@@ -143,10 +138,10 @@ if __name__ == '__main__':
sys.stdout.flush() sys.stdout.flush()
if isinstance(event, paddle.event.EndPass): if isinstance(event, paddle.event.EndPass):
result = trainer.test( result = trainer.test(
reader=paddle.reader.batched( reader=paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict), lambda: paddle.dataset.imdb.test(word_dict),
batch_size=128), batch_size=128),
reader_dict={'word': 0, feeding={'word': 0,
'label': 1}) 'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
...@@ -156,11 +151,11 @@ if __name__ == '__main__': ...@@ -156,11 +151,11 @@ if __name__ == '__main__':
update_equation=adam_optimizer) update_equation=adam_optimizer)
trainer.train( trainer.train(
reader=paddle.reader.batched( reader=paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100), batch_size=100),
event_handler=event_handler, event_handler=event_handler,
reader_dict={'word': 0, feeding={'word': 0,
'label': 1}, 'label': 1},
num_passes=10) num_passes=10)
import os
import paddle.v2 as paddle import paddle.v2 as paddle
from seqToseq_net_v2 import seqToseq_net_v2
# Data Definiation.
# TODO:This code should be merged to dataset package.
data_dir = "./data/pre-wmt14"
src_lang_dict = os.path.join(data_dir, 'src.dict')
trg_lang_dict = os.path.join(data_dir, 'trg.dict')
source_dict_dim = len(open(src_lang_dict, "r").readlines())
target_dict_dim = len(open(trg_lang_dict, "r").readlines())
def read_to_dict(dict_path):
with open(dict_path, "r") as fin:
out_dict = {
line.strip(): line_count
for line_count, line in enumerate(fin)
}
return out_dict
src_dict = read_to_dict(src_lang_dict)
trg_dict = read_to_dict(trg_lang_dict)
train_list = os.path.join(data_dir, 'train.list')
test_list = os.path.join(data_dir, 'test.list')
UNK_IDX = 2
START = "<s>"
END = "<e>"
def _get_ids(s, dictionary):
words = s.strip().split()
return [dictionary[START]] + \
[dictionary.get(w, UNK_IDX) for w in words] + \
[dictionary[END]]
def train_reader(file_name):
def reader():
with open(file_name, 'r') as f:
for line_count, line in enumerate(f):
line_split = line.strip().split('\t')
if len(line_split) != 2:
continue
src_seq = line_split[0] # one source sequence
src_ids = _get_ids(src_seq, src_dict)
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
continue
trg_ids_next = trg_ids + [trg_dict[END]]
trg_ids = [trg_dict[START]] + trg_ids
yield src_ids, trg_ids, trg_ids_next def seqToseq_net(source_dict_dim, target_dict_dim):
### Network Architecture
return reader word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
def main(): def main():
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# define network topology # define network topology
cost = seqToseq_net_v2(source_dict_dim, target_dict_dim) cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
trainer = paddle.trainer.SGD(cost=cost, trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer)
reader_dict = { # define data reader
feeding = {
'source_language_word': 0, 'source_language_word': 0,
'target_language_word': 1, 'target_language_word': 1,
'target_language_next_word': 2 'target_language_next_word': 2
} }
trn_reader = paddle.reader.batched( wmt14_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
train_reader("data/pre-wmt14/train/train"), buf_size=8192), paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192),
batch_size=5) batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
# start to train
trainer.train( trainer.train(
reader=trn_reader, reader=wmt14_reader,
event_handler=event_handler, event_handler=event_handler,
num_passes=10000, num_passes=10000,
reader_dict=reader_dict) feeding=feeding)
if __name__ == '__main__': if __name__ == '__main__':
......
import paddle.v2.activation as activation
import paddle.v2.attr as attr
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
import paddle.v2.networks as networks
def seqToseq_net_v2(source_dict_dim, target_dict_dim):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder
src_word_id = layer.data(
name='source_language_word',
type=data_type.integer_value_sequence(source_dict_dim))
src_embedding = layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=attr.ParamAttr(name='_source_language_embedding'))
src_forward = networks.simple_gru(input=src_embedding, size=encoder_size)
src_backward = networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = layer.concat(input=[src_forward, src_backward])
#### Decoder
with layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += layer.full_matrix_projection(input=encoded_vector)
backward_first = layer.first_seq(input=src_backward)
with layer.mixed(size=decoder_size, act=activation.Tanh()) as decoder_boot:
decoder_boot += layer.full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += layer.full_matrix_projection(input=context)
decoder_inputs += layer.full_matrix_projection(input=current_word)
gru_step = layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with layer.mixed(
size=target_dict_dim, bias_attr=True,
act=activation.Softmax()) as out:
out += layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = layer.embedding(
input=layer.data(
name='target_language_word',
type=data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = layer.data(
name='target_language_next_word',
type=data_type.integer_value_sequence(target_dict_dim))
cost = layer.classification_cost(input=decoder, label=lbl)
return cost
import math
import paddle.v2 as paddle
dictsize = 1953
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.table_projection(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0, ))
return wordemb
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
API API
=== ===
模型配置 API
------------
.. toctree::
:maxdepth: 1
v2/model_configs.rst
数据 API
--------
.. toctree::
:maxdepth: 1
v2/data.rst
训练 API
--------
.. toctree::
:maxdepth: 1
v2/run_logic.rst
\ No newline at end of file
...@@ -8,3 +8,19 @@ Model Config API ...@@ -8,3 +8,19 @@ Model Config API
:maxdepth: 1 :maxdepth: 1
v2/model_configs.rst v2/model_configs.rst
Data API
--------
.. toctree::
:maxdepth: 1
v2/data.rst
Train API
---------
.. toctree::
:maxdepth: 1
v2/run_logic.rst
\ No newline at end of file
================
Data Related API
================
#########
DataTypes
#########
.. automodule:: paddle.v2.data_type
:members:
##########
DataFeeder
##########
.. automodule:: paddle.v2.data_feeder
:members:
######
Reader
######
.. automodule:: paddle.v2.reader
:members:
.. automodule:: paddle.v2.reader.creator
:members:
#########
minibatch
#########
.. automodule:: paddle.v2.minibatch
:members:
#######
Dataset
#######
.. automodule:: paddle.v2.dataset
:members:
mnist
+++++
.. automodule:: paddle.v2.dataset.mnist
:members:
cifar
+++++
.. automodule:: paddle.v2.dataset.cifar
:members:
conll05
+++++++
.. automodule:: paddle.v2.dataset.conll05
:members:
imdb
++++
.. automodule:: paddle.v2.dataset.imdb
:members:
imikolov
++++++++
.. automodule:: paddle.v2.dataset.imikolov
:members:
movielens
+++++++++
.. automodule:: paddle.v2.dataset.movielens
:members:
sentiment
+++++++++
.. automodule:: paddle.v2.dataset.sentiment
:members:
uci_housing
+++++++++++
.. automodule:: paddle.v2.dataset.uci_housing
:members:
#########################
Configuration Related API
#########################
====== ======
Layers Layers
====== ======
.. automodule:: paddle.v2.layer .. automodule:: paddle.v2.layer
:members: :members:
==========
Attributes
==========
.. automodule:: paddle.v2.attr
:members:
===========
Activations
===========
.. automodule:: paddle.v2.activation
:members:
========
Poolings
========
.. automodule:: paddle.v2.pooling
:members:
========
Networks
========
.. automodule:: paddle.v2.networks
:members:
==========
Optimizers
==========
.. automodule:: paddle.v2.optimizer
:members:
###########
Trainer API
###########
==========
Parameters
==========
.. automodule:: paddle.v2.parameters
:members:
=======
Trainer
=======
.. automodule:: paddle.v2.trainer
:members:
=====
Event
=====
.. automodule:: paddle.v2.event
:members:
=========
Inference
=========
.. autofunction:: paddle.v2.infer
\ No newline at end of file
...@@ -31,7 +31,7 @@ def reader_creator_random_image(width, height): ...@@ -31,7 +31,7 @@ def reader_creator_random_image(width, height):
An example implementation for multiple item data reader creator: An example implementation for multiple item data reader creator:
```python ```python
def reader_creator_random_imageand_label(widht, height, label): def reader_creator_random_image_and_label(width, height, label):
def reader(): def reader():
while True: while True:
yield numpy.random.uniform(-1, 1, size=width*height), label yield numpy.random.uniform(-1, 1, size=width*height), label
......
...@@ -346,8 +346,10 @@ Evaluator* MultiGradientMachine::makeEvaluator() const { ...@@ -346,8 +346,10 @@ Evaluator* MultiGradientMachine::makeEvaluator() const {
void MultiGradientMachine::eval(Evaluator* evaluator) const { void MultiGradientMachine::eval(Evaluator* evaluator) const {
for (auto& thread : threads_) { for (auto& thread : threads_) {
SetDevice device(thread->getDeviceId()); SetDevice device(thread->getDeviceId());
if (thread->hasInputData()) {
thread->getGradientMachine()->eval(evaluator); thread->getGradientMachine()->eval(evaluator);
} }
}
} }
void MultiGradientMachine::getOutArgs(std::vector<Argument>* outArgs, void MultiGradientMachine::getOutArgs(std::vector<Argument>* outArgs,
...@@ -356,14 +358,19 @@ void MultiGradientMachine::getOutArgs(std::vector<Argument>* outArgs, ...@@ -356,14 +358,19 @@ void MultiGradientMachine::getOutArgs(std::vector<Argument>* outArgs,
REGISTER_TIMER("waitOutArgs"); REGISTER_TIMER("waitOutArgs");
thread->waitOutArgsReady(); thread->waitOutArgsReady();
} }
outArgs_.resize(threads_[0]->getOutArgs().size());
outArgs_.resize(threads_[threads_.size() - 1]->getOutArgs().size());
REGISTER_TIMER("copyOutArgs"); REGISTER_TIMER("copyOutArgs");
for (size_t i = 0; i < outArgs_.size(); ++i) { for (size_t i = 0; i < outArgs_.size(); ++i) {
std::vector<Argument> args; std::vector<Argument> args;
args.reserve(threads_.size()); args.reserve(threads_.size());
for (auto& thread : threads_) { for (auto& thread : threads_) {
args.push_back(thread->getOutArgs()[i]); // If the thread input is empty, then the output is empty.
auto tmp = thread->getOutArgs();
if (tmp.size() > 0) {
args.push_back(tmp[i]);
}
} }
outArgs_[i].concat(args, useGpu_, outArgStream_, passType); outArgs_[i].concat(args, useGpu_, outArgStream_, passType);
} }
...@@ -534,7 +541,7 @@ void TrainerThread::prefetch() { ...@@ -534,7 +541,7 @@ void TrainerThread::prefetch() {
void TrainerThread::forward() { void TrainerThread::forward() {
if (!inArgsCopied_) { if (!inArgsCopied_) {
REGISTER_TIMER("copyInArgs"); REGISTER_TIMER("copyInArgs");
copyInArgs(); batchSize_ = copyInArgs();
} else { } else {
inArgsCopied_ = false; inArgsCopied_ = false;
} }
...@@ -564,7 +571,12 @@ void TrainerThread::forward() { ...@@ -564,7 +571,12 @@ void TrainerThread::forward() {
{ {
REGISTER_TIMER("thread_forward"); REGISTER_TIMER("thread_forward");
gradientMachine_->forward(inArgs_, &outArgs_, multiMachine_->getPassType()); if (batchSize_ > 0) {
gradientMachine_->forward(
inArgs_, &outArgs_, multiMachine_->getPassType());
} else {
outArgs_.clear();
}
} }
outArgsReadySem_.post(); outArgsReadySem_.post();
} }
...@@ -574,7 +586,13 @@ void TrainerThread::backward() { ...@@ -574,7 +586,13 @@ void TrainerThread::backward() {
if (multiMachine_->isPassGrad()) { if (multiMachine_->isPassGrad()) {
copyOutputGrad(); copyOutputGrad();
} }
if (batchSize_ > 0) {
gradientMachine_->backward(backwardCallback_); gradientMachine_->backward(backwardCallback_);
} else {
for (size_t i = parameters_.size(); i > 0; i--) {
backwardCallback(parameters_[i - 1].get());
}
}
if (multiMachine_->hasNonstaticCpuParamters()) { if (multiMachine_->hasNonstaticCpuParamters()) {
mergeCpuGradients(); mergeCpuGradients();
} }
...@@ -732,7 +750,7 @@ void TrainerThread::notifyValueReady(int paramId) { ...@@ -732,7 +750,7 @@ void TrainerThread::notifyValueReady(int paramId) {
notifyValueDispatch(paramId); notifyValueDispatch(paramId);
} }
void TrainerThread::copyInArgs() { int TrainerThread::copyInArgs() {
const std::vector<Argument>& fullInArgs = multiMachine_->getInArgs(); const std::vector<Argument>& fullInArgs = multiMachine_->getInArgs();
int numThreads = multiMachine_->getAllThreads().size(); int numThreads = multiMachine_->getAllThreads().size();
int32_t numSequences = fullInArgs[0].getNumSequences(); int32_t numSequences = fullInArgs[0].getNumSequences();
...@@ -748,7 +766,7 @@ void TrainerThread::copyInArgs() { ...@@ -748,7 +766,7 @@ void TrainerThread::copyInArgs() {
} }
if (copySize == 0) { if (copySize == 0) {
return; return 0;
} }
for (size_t i = 0; i < fullInArgs.size(); i++) { for (size_t i = 0; i < fullInArgs.size(); i++) {
...@@ -758,6 +776,7 @@ void TrainerThread::copyInArgs() { ...@@ -758,6 +776,7 @@ void TrainerThread::copyInArgs() {
copySize, copySize,
FLAGS_parallel_nn ? false : multiMachine_->useGpu()); FLAGS_parallel_nn ? false : multiMachine_->useGpu());
} }
return copySize;
} }
void TrainerThread::mergeCpuGradients() { void TrainerThread::mergeCpuGradients() {
......
...@@ -387,6 +387,9 @@ public: ...@@ -387,6 +387,9 @@ public:
/// copy the output gradient from the main GradientMachine. /// copy the output gradient from the main GradientMachine.
void copyOutputGrad(); void copyOutputGrad();
/// Whether the thread has input data.
bool hasInputData() { return batchSize_ != 0; }
protected: protected:
void mergeCpuGradients(); void mergeCpuGradients();
...@@ -407,7 +410,7 @@ protected: ...@@ -407,7 +410,7 @@ protected:
void copyGradToBufferThread(); void copyGradToBufferThread();
void gradCollectThread(); void gradCollectThread();
void copyInArgs(); int copyInArgs();
void forward(); void forward();
void backward(); void backward();
void backwardCallback(Parameter* para); void backwardCallback(Parameter* para);
...@@ -467,6 +470,7 @@ protected: ...@@ -467,6 +470,7 @@ protected:
/// indicate whether inArgs is copied before forward() /// indicate whether inArgs is copied before forward()
bool inArgsCopied_; bool inArgsCopied_;
int batchSize_;
}; };
} // namespace paddle } // namespace paddle
...@@ -45,6 +45,23 @@ class CacheType(object): ...@@ -45,6 +45,23 @@ class CacheType(object):
class InputType(object): class InputType(object):
"""
InputType is the base class for paddle input types.
.. note::
this is a base class, and should never be used by user.
:param dim: dimension of input. If the input is an integer, it means the
value range. Otherwise, it means the size of layer.
:type dim: int
:param seq_type: sequence type of input. 0 means it is not a sequence. 1
means it is a variable length sequence. 2 means it is a
nested sequence.
:type seq_type: int
:param type: data type of input.
:type type: int
"""
__slots__ = ['dim', 'seq_type', 'type'] __slots__ = ['dim', 'seq_type', 'type']
def __init__(self, dim, seq_type, tp): def __init__(self, dim, seq_type, tp):
...@@ -54,20 +71,61 @@ class InputType(object): ...@@ -54,20 +71,61 @@ class InputType(object):
def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Dense Vector. It means the input feature is dense float vector. For example,
if the input is an image with 28*28 pixels, the input of Paddle neural
network should be a dense vector with dimension 784.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.Dense) return InputType(dim, seq_type, DataType.Dense)
def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Sparse binary vector. It means the input feature is a sparse vector and the
every element in this vector is either zero or one.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.SparseNonValue) return InputType(dim, seq_type, DataType.SparseNonValue)
def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Sparse vector. It means the input feature is a sparse vector. Most of the
elements in this vector are zero, others could be any float value.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.SparseValue) return InputType(dim, seq_type, DataType.SparseValue)
def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE): def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
"""Data type of integer. """
Data type of integer.
:param seq_type: sequence type of this input.
:type seq_type: int
:param value_range: range of this integer. :param value_range: range of this integer.
:type value_range: int
:return: An input type object
:rtype: InputType
""" """
return InputType(value_range, seq_type, DataType.Index) return InputType(value_range, seq_type, DataType.Index)
...@@ -76,10 +134,17 @@ dense_vector = dense_slot ...@@ -76,10 +134,17 @@ dense_vector = dense_slot
sparse_binary_vector = sparse_non_value_slot sparse_binary_vector = sparse_non_value_slot
sparse_vector = sparse_value_slot sparse_vector = sparse_value_slot
integer_value = index_slot integer_value = index_slot
integer_value.__doc__ = index_slot.__doc__
def dense_vector_sequence(dim): def dense_vector_sequence(dim):
"""
Data type of a sequence of dense vector.
:param dim: dimension of dense vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return dense_vector(dim, seq_type=SequenceType.SEQUENCE) return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
...@@ -88,6 +153,15 @@ def dense_vector_sub_sequence(dim): ...@@ -88,6 +153,15 @@ def dense_vector_sub_sequence(dim):
def sparse_binary_vector_sequence(dim): def sparse_binary_vector_sequence(dim):
"""
Data type of a sequence of sparse vector, which every element is either zero
or one.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE) return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
...@@ -96,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim): ...@@ -96,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim):
def sparse_vector_sequence(dim): def sparse_vector_sequence(dim):
"""
Data type of a sequence of sparse vector, which most elements are zero,
others could be any float value.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return sparse_vector(dim, seq_type=SequenceType.SEQUENCE) return sparse_vector(dim, seq_type=SequenceType.SEQUENCE)
...@@ -104,8 +187,11 @@ def sparse_vector_sub_sequence(dim): ...@@ -104,8 +187,11 @@ def sparse_vector_sub_sequence(dim):
def integer_value_sequence(value_range): def integer_value_sequence(value_range):
"""Data type of a sequence of integer. """
Data type of a sequence of integer.
:param value_range: range of each element. :param value_range: range of each element.
:type value_range: int
""" """
return integer_value(value_range, seq_type=SequenceType.SEQUENCE) return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
...@@ -115,7 +201,6 @@ def integer_value_sub_sequence(dim): ...@@ -115,7 +201,6 @@ def integer_value_sub_sequence(dim):
integer_sequence = integer_value_sequence integer_sequence = integer_value_sequence
integer_sequence.__doc__ = integer_value_sequence.__doc__
class SingleSlotWrapper(object): class SingleSlotWrapper(object):
......
...@@ -39,6 +39,7 @@ register_unary_math_op('abs', act.AbsActivation()) ...@@ -39,6 +39,7 @@ register_unary_math_op('abs', act.AbsActivation())
register_unary_math_op('sigmoid', act.SigmoidActivation()) register_unary_math_op('sigmoid', act.SigmoidActivation())
register_unary_math_op('tanh', act.TanhActivation()) register_unary_math_op('tanh', act.TanhActivation())
register_unary_math_op('square', act.SquareActivation()) register_unary_math_op('square', act.SquareActivation())
register_unary_math_op('relu', act.ReluActivation())
def add(layeroutput, other): def add(layeroutput, other):
......
...@@ -795,17 +795,16 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): ...@@ -795,17 +795,16 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
.. code-block:: python .. code-block:: python
data = data_layer(name="input", data = data_layer(name="input", size=1000)
size=1000)
:param name: Name of this data layer. :param name: Name of this data layer.
:type name: basestring :type name: basestring
:param size: Size of this data layer. :param size: Size of this data layer.
:type size: int :type size: int
:param height: Height of this data layer, used for image :param height: Height of this data layer, used for image
:type size: int|None :type height: int|None
:param width: Width of this data layer, used for image :param width: Width of this data layer, used for image
:type size: int|None :type width: int|None
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
......
...@@ -7,8 +7,9 @@ x = layer_math.exp(x) ...@@ -7,8 +7,9 @@ x = layer_math.exp(x)
x = layer_math.log(x) x = layer_math.log(x)
x = layer_math.abs(x) x = layer_math.abs(x)
x = layer_math.sigmoid(x) x = layer_math.sigmoid(x)
x = layer_math.tanh(x)
x = layer_math.square(x) x = layer_math.square(x)
x = layer_math.square(x) x = layer_math.relu(x)
y = 1 + x y = 1 + x
y = y + 1 y = y + 1
y = x + y y = x + y
......
...@@ -65,13 +65,28 @@ layers { ...@@ -65,13 +65,28 @@ layers {
} }
} }
} }
layers {
name: "__tanh_0__"
type: "mixed"
size: 100
active_type: "tanh"
inputs {
input_layer_name: "__sigmoid_0__"
proj_conf {
type: "identity"
name: "___tanh_0__.w0"
input_size: 100
output_size: 100
}
}
}
layers { layers {
name: "__square_0__" name: "__square_0__"
type: "mixed" type: "mixed"
size: 100 size: 100
active_type: "square" active_type: "square"
inputs { inputs {
input_layer_name: "__sigmoid_0__" input_layer_name: "__tanh_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___square_0__.w0" name: "___square_0__.w0"
...@@ -81,15 +96,15 @@ layers { ...@@ -81,15 +96,15 @@ layers {
} }
} }
layers { layers {
name: "__square_1__" name: "__relu_0__"
type: "mixed" type: "mixed"
size: 100 size: 100
active_type: "square" active_type: "relu"
inputs { inputs {
input_layer_name: "__square_0__" input_layer_name: "__square_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___square_1__.w0" name: "___relu_0__.w0"
input_size: 100 input_size: 100
output_size: 100 output_size: 100
} }
...@@ -101,7 +116,7 @@ layers { ...@@ -101,7 +116,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
} }
slope: 1.0 slope: 1.0
intercept: 1 intercept: 1
...@@ -123,7 +138,7 @@ layers { ...@@ -123,7 +138,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___mixed_0__.w0" name: "___mixed_0__.w0"
...@@ -147,7 +162,7 @@ layers { ...@@ -147,7 +162,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
} }
slope: -1.0 slope: -1.0
intercept: 0.0 intercept: 0.0
...@@ -339,8 +354,9 @@ sub_models { ...@@ -339,8 +354,9 @@ sub_models {
layer_names: "__log_0__" layer_names: "__log_0__"
layer_names: "__abs_0__" layer_names: "__abs_0__"
layer_names: "__sigmoid_0__" layer_names: "__sigmoid_0__"
layer_names: "__tanh_0__"
layer_names: "__square_0__" layer_names: "__square_0__"
layer_names: "__square_1__" layer_names: "__relu_0__"
layer_names: "__slope_intercept_layer_0__" layer_names: "__slope_intercept_layer_0__"
layer_names: "__slope_intercept_layer_1__" layer_names: "__slope_intercept_layer_1__"
layer_names: "__mixed_0__" layer_names: "__mixed_0__"
......
...@@ -12,26 +12,15 @@ ...@@ -12,26 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.trainer_config_helpers.activations import * import paddle.trainer_config_helpers.activations
import copy
__all__ = [ __all__ = []
"Base", "Tanh", "Sigmoid", "Softmax", "Identity", "Linear",
'SequenceSoftmax', "Exp", "Relu", "BRelu", "SoftRelu", "STanh", "Abs",
"Square", "Log"
]
Base = BaseActivation suffix = 'Activation'
Tanh = TanhActivation for act in paddle.trainer_config_helpers.activations.__all__:
Sigmoid = SigmoidActivation new_name = act[:-len(suffix)]
Softmax = SoftmaxActivation globals()[new_name] = copy.copy(
SequenceSoftmax = SequenceSoftmaxActivation getattr(paddle.trainer_config_helpers.activations, act))
Identity = IdentityActivation globals()[new_name].__name__ = new_name
Linear = Identity __all__.append(new_name)
Relu = ReluActivation
BRelu = BReluActivation
SoftRelu = SoftReluActivation
STanh = STanhActivation
Abs = AbsActivation
Square = SquareActivation
Exp = ExpActivation
Log = LogActivation
...@@ -12,12 +12,16 @@ ...@@ -12,12 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.trainer_config_helpers.attrs import * import paddle.trainer_config_helpers.attrs
__all__ = [ __all__ = [
"Param", "Param",
"Extra", "Extra",
] ]
Param = ParameterAttribute Param = paddle.trainer_config_helpers.attrs.ParameterAttribute
Extra = ExtraLayerAttribute Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute
for each in paddle.trainer_config_helpers.attrs.__all__:
globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each)
__all__.append(each)
...@@ -13,12 +13,55 @@ ...@@ -13,12 +13,55 @@
# limitations under the License. # limitations under the License.
import collections import collections
import re
from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_name_default
import paddle.trainer_config_helpers as conf_helps import paddle.trainer_config_helpers as conf_helps
class LayerType(type):
def __new__(cls, name, bases, attrs):
method_name = attrs.get('METHOD_NAME', None)
if method_name is not None:
method = getattr(conf_helps, method_name)
if method.__doc__ is not None:
mapper = attrs.get("__map_docstr__", None)
if mapper is not None:
attrs['__doc__'] = LayerType.__map_docstr__(
mapper(method.__doc__),
method_name=method_name,
name=name)
else:
attrs['__doc__'] = LayerType.__map_docstr__(
method.__doc__, method_name=method_name, name=name)
return super(LayerType, cls).__new__(cls, name, bases, attrs)
@staticmethod
def __map_docstr__(doc, name, method_name):
assert isinstance(doc, basestring)
# replace LayerOutput to paddle.v2.config_base.Layer
doc = doc.replace("LayerOutput", "paddle.v2.config_base.Layer")
doc = doc.replace('ParameterAttribute',
'paddle.v2.attr.ParameterAttribute')
doc = re.sub(r'ExtraLayerAttribute[^\s]?',
'paddle.v2.attr.ExtraAttribute', doc)
# xxx_layer to xxx
doc = re.sub(r"(?P<name>[a-z]+)_layer", r"\g<name>", doc)
# XxxxActivation to paddle.v2.Activation.Xxxx
doc = re.sub(r"(?P<name>[A-Z][a-zA-Z]+)Activation",
r"paddle.v2.Activation.\g<name>", doc)
# TODO(yuyang18): Add more rules if needed.
return doc
class Layer(object): class Layer(object):
__metaclass__ = LayerType
def __init__(self, name=None, parent_layers=None): def __init__(self, name=None, parent_layers=None):
assert isinstance(parent_layers, dict) assert isinstance(parent_layers, dict)
self.name = name self.name = name
...@@ -80,6 +123,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True): ...@@ -80,6 +123,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True):
wrapper = None wrapper = None
class V2LayerImpl(Layer): class V2LayerImpl(Layer):
METHOD_NAME = method_name
def __init__(self, **kwargs): def __init__(self, **kwargs):
parent_layers = dict() parent_layers = dict()
other_kwargs = dict() other_kwargs = dict()
......
...@@ -12,13 +12,20 @@ ...@@ -12,13 +12,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from py_paddle import swig_paddle
from py_paddle import DataProviderConverter from py_paddle import DataProviderConverter
import data_type
import paddle.trainer.PyDataProvider2 as pydp2
__all__ = ['DataFeeder'] __all__ = ['DataFeeder']
def default_feeding_map(data_types):
reader_dict = dict()
for i, tp in enumerate(data_types):
reader_dict[tp[0]] = i
return reader_dict
class DataFeeder(DataProviderConverter): class DataFeeder(DataProviderConverter):
""" """
DataFeeder converts the data returned by paddle.reader into a data structure DataFeeder converts the data returned by paddle.reader into a data structure
...@@ -30,6 +37,9 @@ class DataFeeder(DataProviderConverter): ...@@ -30,6 +37,9 @@ class DataFeeder(DataProviderConverter):
The example usage: The example usage:
.. code-block:: python
data_types = [('image', paddle.data_type.dense_vector(784)), data_types = [('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))] ('label', paddle.data_type.integer_value(10))]
reader_dict = {'image':0, 'label':1} reader_dict = {'image':0, 'label':1}
...@@ -43,49 +53,51 @@ class DataFeeder(DataProviderConverter): ...@@ -43,49 +53,51 @@ class DataFeeder(DataProviderConverter):
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample
# ] # ]
arg = feeder(minibatch_data) arg = feeder(minibatch_data)
"""
def __init__(self, data_types, reader_dict): .. note::
"""
This module is for internal use only. Users should use the `reader`
interface.
:param data_types: A list to specify data name and type. Each item is :param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type). For example: a tuple of (data_name, data_type).
[('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
:type data_types: A list of tuple :type data_types: list
:param reader_dict: A dictionary to specify the position of each data :param reader_dict: A dictionary to specify the position of each data
in the input data. in the input data.
:type reader_dict: dict() :type feeding: dict
""" """
def __init__(self, data_types, feeding=None):
self.input_names = [] self.input_names = []
input_types = [] input_types = []
self.reader_dict = reader_dict if feeding is None:
feeding = default_feeding_map(data_types)
self.feeding = feeding
for each in data_types: for each in data_types:
self.input_names.append(each[0]) self.input_names.append(each[0])
assert isinstance(each[1], data_type.InputType) if not isinstance(each[1], pydp2.InputType):
raise TypeError("second item in each data_type should be an "
"InputType")
input_types.append(each[1]) input_types.append(each[1])
DataProviderConverter.__init__(self, input_types) DataProviderConverter.__init__(self, input_types)
def __len__(self):
return len(self.input_names)
def convert(self, dat, argument=None): def convert(self, dat, argument=None):
""" """
:param dat: A list of mini-batch data. Each sample is a list or tuple :param dat: A list of mini-batch data. Each sample is a list or tuple
one feature or multiple features. one feature or multiple features.
for example:
[
([0.2, 0.2], ), # first sample
([0.8, 0.3], ), # second sample
]
or,
[
[[0.2, 0.2], ], # first sample
[[0.8, 0.3], ], # second sample
]
:type dat: List :type dat: list
:param argument: An Arguments object contains this mini-batch data with :param argument: An Arguments object contains this mini-batch data with
one or multiple features. The Arguments definition is one or multiple features. The Arguments definition is
in the API. in the API.
:type argument: swig_paddle.Arguments :type argument: py_paddle.swig_paddle.Arguments
""" """
def reorder_data(data): def reorder_data(data):
...@@ -93,7 +105,7 @@ class DataFeeder(DataProviderConverter): ...@@ -93,7 +105,7 @@ class DataFeeder(DataProviderConverter):
for each in data: for each in data:
reorder = [] reorder = []
for name in self.input_names: for name in self.input_names:
reorder.append(each[self.reader_dict[name]]) reorder.append(each[self.feeding[name]])
retv.append(reorder) retv.append(reorder)
return retv return retv
......
...@@ -12,11 +12,15 @@ ...@@ -12,11 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.trainer.PyDataProvider2 import \ import paddle.trainer.PyDataProvider2 as pydp2
InputType, DataType, dense_vector, sparse_binary_vector,\
sparse_vector, integer_value, integer_value_sequence
__all__ = [ import_list = [
'InputType', 'DataType', 'dense_vector', 'sparse_binary_vector', nm for nm in dir(pydp2)
'sparse_vector', 'integer_value', 'integer_value_sequence' if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm)
] ]
import_list.extend(['InputType'])
for nm in import_list:
globals()[nm] = getattr(pydp2, nm)
__all__ = import_list
...@@ -11,6 +11,9 @@ ...@@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Dataset package.
"""
import mnist import mnist
import imikolov import imikolov
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
""" """
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
TODO(yuyang18): Complete the comments.
""" """
import cPickle import cPickle
......
...@@ -16,15 +16,17 @@ import tarfile ...@@ -16,15 +16,17 @@ import tarfile
import gzip import gzip
import itertools import itertools
from common import download from common import download
__all__ = ['test, get_dict', 'get_embedding']
""" """
Conll 2005 dataset. Paddle semantic role labeling Book and demo use this Conll 2005 dataset. Paddle semantic role labeling Book and demo use this
dataset as an example. Because Conll 2005 is not free in public, the default dataset as an example. Because Conll 2005 is not free in public, the default
downloaded URL is test set of Conll 2005 (which is public). Users can change downloaded URL is test set of Conll 2005 (which is public). Users can change
URL and MD5 to their Conll dataset. URL and MD5 to their Conll dataset.
TODO(yuyang18): Complete comments.
""" """
__all__ = ['test, get_dict', 'get_embedding']
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc' DATA_MD5 = '387719152ae52d60422c016e92a742fc'
WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt' WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
""" """
IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
TODO(yuyang18): Complete comments.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
""" """
imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
Complete comments.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
......
...@@ -13,6 +13,9 @@ ...@@ -13,6 +13,9 @@
# limitations under the License. # limitations under the License.
""" """
MNIST dataset. MNIST dataset.
This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
parse train set and test set into paddle reader creators.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import subprocess import subprocess
...@@ -72,6 +75,15 @@ def reader_creator(image_filename, label_filename, buffer_size): ...@@ -72,6 +75,15 @@ def reader_creator(image_filename, label_filename, buffer_size):
def train(): def train():
"""
MNIST train set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Train reader creator
:rtype: callable
"""
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
TRAIN_IMAGE_MD5), TRAIN_IMAGE_MD5),
...@@ -80,6 +92,15 @@ def train(): ...@@ -80,6 +92,15 @@ def train():
def test(): def test():
"""
MNIST test set cretor.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Test reader creator.
:rtype: callable
"""
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist',
TEST_IMAGE_MD5), TEST_IMAGE_MD5),
......
...@@ -11,6 +11,11 @@ ...@@ -11,6 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Movielens 1-M dataset.
TODO(yuyang18): Complete comments.
"""
import zipfile import zipfile
from common import download from common import download
...@@ -18,7 +23,12 @@ import re ...@@ -18,7 +23,12 @@ import re
import random import random
import functools import functools
__all__ = ['train_creator', 'test_creator'] __all__ = [
'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info'
]
age_table = [1, 18, 25, 35, 45, 50, 56]
class MovieInfo(object): class MovieInfo(object):
...@@ -33,17 +43,32 @@ class MovieInfo(object): ...@@ -33,17 +43,32 @@ class MovieInfo(object):
[MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()] [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
] ]
def __str__(self):
return "<MovieInfo id(%d), title(%s), categories(%s)>" % (
self.index, self.title, self.categories)
def __repr__(self):
return self.__str__()
class UserInfo(object): class UserInfo(object):
def __init__(self, index, gender, age, job_id): def __init__(self, index, gender, age, job_id):
self.index = int(index) self.index = int(index)
self.is_male = gender == 'M' self.is_male = gender == 'M'
self.age = [1, 18, 25, 35, 45, 50, 56].index(int(age)) self.age = age_table.index(int(age))
self.job_id = int(job_id) self.job_id = int(job_id)
def value(self): def value(self):
return [self.index, 0 if self.is_male else 1, self.age, self.job_id] return [self.index, 0 if self.is_male else 1, self.age, self.job_id]
def __str__(self):
return "<UserInfo id(%d), gender(%s), age(%d), job(%d)>" % (
self.index, "M"
if self.is_male else "F", age_table[self.age], self.job_id)
def __repr__(self):
return str(self)
MOVIE_INFO = None MOVIE_INFO = None
MOVIE_TITLE_DICT = None MOVIE_TITLE_DICT = None
...@@ -54,7 +79,8 @@ USER_INFO = None ...@@ -54,7 +79,8 @@ USER_INFO = None
def __initialize_meta_info__(): def __initialize_meta_info__():
fn = download( fn = download(
url='http://files.grouplens.org/datasets/movielens/ml-1m.zip', url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
md5='c4d9eecfca2ab87c1945afe126590906') module_name='movielens',
md5sum='c4d9eecfca2ab87c1945afe126590906')
global MOVIE_INFO global MOVIE_INFO
if MOVIE_INFO is None: if MOVIE_INFO is None:
pattern = re.compile(r'^(.*)\((\d+)\)$') pattern = re.compile(r'^(.*)\((\d+)\)$')
...@@ -117,14 +143,63 @@ def __reader_creator__(**kwargs): ...@@ -117,14 +143,63 @@ def __reader_creator__(**kwargs):
return lambda: __reader__(**kwargs) return lambda: __reader__(**kwargs)
train_creator = functools.partial(__reader_creator__, is_test=False) train = functools.partial(__reader_creator__, is_test=False)
test_creator = functools.partial(__reader_creator__, is_test=True) test = functools.partial(__reader_creator__, is_test=True)
def get_movie_title_dict():
__initialize_meta_info__()
return MOVIE_TITLE_DICT
def __max_index_info__(a, b):
if a.index > b.index:
return a
else:
return b
def max_movie_id():
__initialize_meta_info__()
return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index
def max_user_id():
__initialize_meta_info__()
return reduce(__max_index_info__, USER_INFO.viewvalues()).index
def __max_job_id_impl__(a, b):
if a.job_id > b.job_id:
return a
else:
return b
def max_job_id():
__initialize_meta_info__()
return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id
def movie_categories():
__initialize_meta_info__()
return CATEGORIES_DICT
def user_info():
__initialize_meta_info__()
return USER_INFO
def movie_info():
__initialize_meta_info__()
return MOVIE_INFO
def unittest(): def unittest():
for train_count, _ in enumerate(train_creator()()): for train_count, _ in enumerate(train()()):
pass pass
for test_count, _ in enumerate(test_creator()()): for test_count, _ in enumerate(test()()):
pass pass
print train_count, test_count print train_count, test_count
......
...@@ -15,18 +15,19 @@ ...@@ -15,18 +15,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
The script fetch and preprocess movie_reviews data set The script fetch and preprocess movie_reviews data set that provided by NLTK
that provided by NLTK TODO(yuyang18): Complete dataset.
""" """
import common
import collections import collections
import nltk
import numpy as np
from itertools import chain from itertools import chain
import nltk
from nltk.corpus import movie_reviews from nltk.corpus import movie_reviews
import common
__all__ = ['train', 'test', 'get_word_dict'] __all__ = ['train', 'test', 'get_word_dict']
NUM_TRAINING_INSTANCES = 1600 NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000 NUM_TOTAL_INSTANCES = 2000
......
...@@ -11,6 +11,11 @@ ...@@ -11,6 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
UCI Housing dataset.
TODO(yuyang18): Complete comments.
"""
import numpy as np import numpy as np
import os import os
......
...@@ -14,129 +14,92 @@ ...@@ -14,129 +14,92 @@
""" """
wmt14 dataset wmt14 dataset
""" """
import paddle.v2.dataset.common
import tarfile import tarfile
import os.path
import itertools import paddle.v2.dataset.common
__all__ = ['train', 'test', 'build_dict'] __all__ = ['train', 'test', 'build_dict']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
URL_TRAIN = 'http://localhost:8000/train.tgz' # this is a small set of data for test. The original data is too large and will be add later.
MD5_TRAIN = '72de99da2830ea5a3a2c4eb36092bbc7' URL_TRAIN = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
MD5_TRAIN = 'a755315dd01c2c35bde29a744ede23a6'
def word_count(f, word_freq=None): START = "<s>"
add = paddle.v2.dataset.common.dict_add END = "<e>"
if word_freq == None: UNK = "<unk>"
word_freq = {} UNK_IDX = 2
for l in f:
for w in l.strip().split(): def __read_to_dict__(tar_file, dict_size):
add(word_freq, w) def __to_dict__(fd, size):
add(word_freq, '<s>') out_dict = dict()
add(word_freq, '<e>') for line_count, line in enumerate(fd):
if line_count < size:
return word_freq out_dict[line.strip()] = line_count
else:
break
def get_word_dix(word_freq): return out_dict
TYPO_FREQ = 50
word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items()) with tarfile.open(tar_file, mode='r') as f:
word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0])) names = [
words, _ = list(zip(*word_freq_sorted)) each_item.name for each_item in f
word_idx = dict(zip(words, xrange(len(words)))) if each_item.name.endswith("src.dict")
word_idx['<unk>'] = len(words) ]
return word_idx assert len(names) == 1
src_dict = __to_dict__(f.extractfile(names[0]), dict_size)
names = [
def get_word_freq(train, dev): each_item.name for each_item in f
word_freq = word_count(train, word_count(dev)) if each_item.name.endswith("trg.dict")
if '<unk>' in word_freq: ]
# remove <unk> for now, since we will set it as last index assert len(names) == 1
del word_freq['<unk>'] trg_dict = __to_dict__(f.extractfile(names[0]), dict_size)
return word_freq return src_dict, trg_dict
def build_dict():
base_dir = './wmt14-data'
train_en_filename = base_dir + '/train/train.en'
train_fr_filename = base_dir + '/train/train.fr'
dev_en_filename = base_dir + '/dev/ntst1213.en'
dev_fr_filename = base_dir + '/dev/ntst1213.fr'
if not os.path.exists(train_en_filename) or not os.path.exists(
train_fr_filename):
with tarfile.open(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14',
MD5_TRAIN)) as tf:
tf.extractall(base_dir)
if not os.path.exists(dev_en_filename) or not os.path.exists(
dev_fr_filename):
with tarfile.open(
paddle.v2.dataset.common.download(URL_DEV_TEST, 'wmt14',
MD5_DEV_TEST)) as tf:
tf.extractall(base_dir)
f_en = open(train_en_filename)
f_fr = open(train_fr_filename)
f_en_dev = open(dev_en_filename)
f_fr_dev = open(dev_fr_filename)
word_freq_en = get_word_freq(f_en, f_en_dev)
word_freq_fr = get_word_freq(f_fr, f_fr_dev)
f_en.close()
f_fr.close()
f_en_dev.close()
f_fr_dev.close()
return get_word_dix(word_freq_en), get_word_dix(word_freq_fr)
def reader_creator(directory, path_en, path_fr, URL, MD5, dict_en, dict_fr): def reader_creator(tar_file, file_name, dict_size):
def reader(): def reader():
if not os.path.exists(path_en) or not os.path.exists(path_fr): src_dict, trg_dict = __read_to_dict__(tar_file, dict_size)
with tarfile.open( with tarfile.open(tar_file, mode='r') as f:
paddle.v2.dataset.common.download(URL, 'wmt14', MD5)) as tf: names = [
tf.extractall(directory) each_item.name for each_item in f
if each_item.name.endswith(file_name)
f_en = open(path_en) ]
f_fr = open(path_fr) for name in names:
UNK_en = dict_en['<unk>'] for line in f.extractfile(name):
UNK_fr = dict_fr['<unk>'] line_split = line.strip().split('\t')
if len(line_split) != 2:
for en, fr in itertools.izip(f_en, f_fr): continue
src_ids = [dict_en.get(w, UNK_en) for w in en.strip().split()] src_seq = line_split[0] # one source sequence
tar_ids = [ src_words = src_seq.split()
dict_fr.get(w, UNK_fr) src_ids = [
for w in ['<s>'] + fr.strip().split() + ['<e>'] src_dict.get(w, UNK_IDX)
for w in [START] + src_words + [END]
] ]
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode # remove sequence whose length > 80 in training mode
if len(src_ids) == 0 or len(tar_ids) <= 1 or len( if len(src_ids) > 80 or len(trg_ids) > 80:
src_ids) > 80 or len(tar_ids) > 80:
continue continue
trg_ids_next = trg_ids + [trg_dict[END]]
trg_ids = [trg_dict[START]] + trg_ids
yield src_ids, tar_ids[:-1], tar_ids[1:] yield src_ids, trg_ids, trg_ids_next
f_en.close()
f_fr.close()
return reader return reader
def train(dict_en, dict_fr): def train(dict_size):
directory = './wmt14-data' return reader_creator(
return reader_creator(directory, directory + '/train/train.en', paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
directory + '/train/train.fr', URL_TRAIN, MD5_TRAIN, 'train/train', dict_size)
dict_en, dict_fr)
def test(dict_en, dict_fr): def test(dict_size):
directory = './wmt14-data' return reader_creator(
return reader_creator(directory, directory + '/dev/ntst1213.en', paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
directory + '/dev/ntst1213.fr', URL_DEV_TEST, 'test/test', dict_size)
MD5_DEV_TEST, dict_en, dict_fr)
...@@ -34,6 +34,10 @@ class WithMetric(object): ...@@ -34,6 +34,10 @@ class WithMetric(object):
class TestResult(WithMetric): class TestResult(WithMetric):
"""
Result that trainer.test return.
"""
def __init__(self, evaluator, cost): def __init__(self, evaluator, cost):
super(TestResult, self).__init__(evaluator) super(TestResult, self).__init__(evaluator)
self.cost = cost self.cost = cost
......
import numpy
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
import collections
import topology import topology
import minibatch
from data_feeder import DataFeeder from data_feeder import DataFeeder
import itertools
import numpy
__all__ = ['infer'] __all__ = ['infer']
...@@ -21,10 +21,33 @@ class Inference(object): ...@@ -21,10 +21,33 @@ class Inference(object):
self.__gradient_machine__ = gm self.__gradient_machine__ = gm
self.__data_types__ = topo.data_type() self.__data_types__ = topo.data_type()
def iter_infer(self, reader, reader_dict=None): def iter_infer(self, input=None, batch_size=None, reader=None,
if reader_dict is None: feeding=None):
reader_dict = self.default_reader_dict() feeder = DataFeeder(self.__data_types__, feeding)
feeder = DataFeeder(self.__data_types__, reader_dict) if reader is None:
assert input is not None and isinstance(input, collections.Iterable)
if not isinstance(input, collections.Iterable):
raise TypeError("When reader is None, input should be whole "
"inference data and should be iterable")
if batch_size is None:
if not hasattr(input, '__len__'):
raise ValueError("Should set batch size when input data "
"don't contain length.")
batch_size = len(input)
def __reader_impl__():
for each_sample in input:
if len(feeder) == 1:
yield [each_sample]
else:
yield each_sample
reader = minibatch.batch(__reader_impl__, batch_size=batch_size)
else:
if input is not None:
raise ValueError("User should set either input or reader, "
"should not set them both.")
self.__gradient_machine__.start() self.__gradient_machine__.start()
for data_batch in reader(): for data_batch in reader():
yield self.__gradient_machine__.forwardTest(feeder(data_batch)) yield self.__gradient_machine__.forwardTest(feeder(data_batch))
...@@ -47,13 +70,53 @@ class Inference(object): ...@@ -47,13 +70,53 @@ class Inference(object):
else: else:
return retv return retv
def default_reader_dict(self):
reader_dict = dict()
for i, tp in enumerate(self.__data_types__):
reader_dict[tp[0]] = i
return reader_dict
def infer(output,
parameters,
input=None,
batch_size=None,
reader=None,
feeding=None,
field='value'):
"""
Infer a neural network by given neural network output and parameters. The
user should pass either a batch of input data or reader method.
Example usages:
.. code-block:: python
result = paddle.infer(prediction, parameters, input=SomeData,
batch_size=32)
print result
:param output: output of the neural network that would be inferred
:type output: paddle.v2.config_base.Layer
:param parameters: parameters of the neural network.
:type parameters: paddle.v2.parameters.Parameters
:param input: input data batch. Should be a python iterable object, and each
element is the data batch.
:type input: collections.Iterable
:param batch_size: the batch size when perform inference. Default is the
length of input.
:type batch_size: int
:param reader: input data reader creator in batch. If this field is set, the
`input` and `batch_size` will be ignored.
:type reader: callable
:param feeding: Reader dictionary. Default could generate from input
value.
:param field: The prediction field. It should in [`value`, `ids`]. `value`
means return the prediction probabilities, `ids` means return
the prediction labels. Default is `value`
:type field: str
:return: a numpy array
:rtype: numpy.ndarray
"""
def infer(output, parameters, reader, reader_dict=None, field='value'):
inferer = Inference(output=output, parameters=parameters) inferer = Inference(output=output, parameters=parameters)
return inferer.infer(field=field, reader=reader, reader_dict=reader_dict) return inferer.infer(
field=field,
input=input,
batch_size=batch_size,
reader=reader,
feeding=feeding)
...@@ -28,7 +28,7 @@ The primary usage shows below. ...@@ -28,7 +28,7 @@ The primary usage shows below.
act=paddle.activation.Softmax()) act=paddle.activation.Softmax())
# use prediction instance where needed. # use prediction instance where needed.
parameters = paddle.v2.parameters.create(cost) parameters = paddle.parameters.create(cost)
""" """
import collections import collections
...@@ -47,26 +47,32 @@ from paddle.trainer.config_parser import \ ...@@ -47,26 +47,32 @@ from paddle.trainer.config_parser import \
RecurrentLayerGroupEnd, model_type RecurrentLayerGroupEnd, model_type
import activation import activation
import re
import data_type import data_type
__all__ = ['parse_network', 'data'] __all__ = ['parse_network', 'data']
__projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps))
__all__ += __projection_names__
__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
__all__ += __operator_names__
def parse_network(*outputs): def parse_network(*outputs):
""" """
parse all output layers and then generate a model config proto. Parse all output layers and then generate a ModelConfig object.
:param outputs:
:return: .. note::
This function is used internally in paddle.v2 module. User should never
invoke this method.
:param outputs: Output layers.
:type outputs: Layer
:return: A ModelConfig object instance.
:rtype: ModelConfig
""" """
def __real_func__(): def __real_func__():
"""
__real_func__ is the function that config_parser.parse invoked. It is
the plain old paddle configuration function.
"""
context = dict() context = dict()
real_output = [each.to_proto(context=context) for each in outputs] real_output = [each.to_proto(context=context) for each in outputs]
conf_helps.outputs(real_output) conf_helps.outputs(real_output)
...@@ -81,6 +87,8 @@ So we also need to implement some special LayerV2. ...@@ -81,6 +87,8 @@ So we also need to implement some special LayerV2.
class DataLayerV2(Layer): class DataLayerV2(Layer):
METHOD_NAME = 'data_layer'
def __init__(self, name, type, **kwargs): def __init__(self, name, type, **kwargs):
assert isinstance(type, data_type.InputType) assert isinstance(type, data_type.InputType)
...@@ -99,6 +107,17 @@ class DataLayerV2(Layer): ...@@ -99,6 +107,17 @@ class DataLayerV2(Layer):
args[each] = self.__kwargs__[each] args[each] = self.__kwargs__[each]
return getattr(conf_helps, self.__method_name__)(name=self.name, **args) return getattr(conf_helps, self.__method_name__)(name=self.name, **args)
def __map_docstr__(doc):
doc = re.sub(r'(data = [^\)]+)\).*',
"data = paddle.layer.data(name=\"input\", "
"type=paddle.data_type.dense_vector(1000))", doc)
doc = re.sub(r':param size:.*',
':param type: Data type of this data layer', doc)
doc = re.sub(r':type size:.*',
":type size: paddle.v2.data_type.InputType", doc)
return doc
class WithExtraParent(Layer): class WithExtraParent(Layer):
def extra_parent(self): def extra_parent(self):
...@@ -347,6 +366,7 @@ class RecurrentLayerOutput(Layer): ...@@ -347,6 +366,7 @@ class RecurrentLayerOutput(Layer):
LayerV2 = Layer LayerV2 = Layer
data = DataLayerV2 data = DataLayerV2
data.__name__ = 'data'
AggregateLevel = conf_helps.layers.AggregateLevel AggregateLevel = conf_helps.layers.AggregateLevel
ExpandLevel = conf_helps.layers.ExpandLevel ExpandLevel = conf_helps.layers.ExpandLevel
memory = MemoryV2 memory = MemoryV2
...@@ -386,6 +406,7 @@ def __convert_layer__(_new_name_, _old_name_, _parent_names_): ...@@ -386,6 +406,7 @@ def __convert_layer__(_new_name_, _old_name_, _parent_names_):
global __all__ global __all__
__all__.append(_new_name_) __all__.append(_new_name_)
globals()[new_name] = __convert_to_v2__(_old_name_, _parent_names_) globals()[new_name] = __convert_to_v2__(_old_name_, _parent_names_)
globals()[new_name].__name__ = new_name
for each_layer_name in dir(conf_helps): for each_layer_name in dir(conf_helps):
...@@ -399,21 +420,6 @@ del parent_names ...@@ -399,21 +420,6 @@ del parent_names
del new_name del new_name
del each_layer_name del each_layer_name
# convert projection
for prj in __projection_names__:
globals()[prj] = __convert_to_v2__(
prj, parent_names=['input'], is_default_name=False)
# convert operator
operator_list = [
# [V1_method_name, parent_names],
['dotmul_operator', ['a', 'b']],
['conv_operator', ['img', 'filter']]
]
for op in operator_list:
globals()[op[0]] = __convert_to_v2__(
op[0], parent_names=op[1], is_default_name=False)
@wrap_name_default() @wrap_name_default()
def recurrent_group(step, input, name=None): def recurrent_group(step, input, name=None):
...@@ -464,3 +470,29 @@ def recurrent_group(step, input, name=None): ...@@ -464,3 +470,29 @@ def recurrent_group(step, input, name=None):
return retv[0] return retv[0]
else: else:
return retv return retv
__projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps))
__all__ += __projection_names__
__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
__all__ += __operator_names__
# convert projection
for prj in __projection_names__:
globals()[prj] = __convert_to_v2__(
prj, parent_names=['input'], is_default_name=False)
globals()[prj].__name__ = prj
# convert operator
operator_list = [
# [V1_method_name, parent_names],
['dotmul_operator', ['a', 'b']],
['conv_operator', ['img', 'filter']]
]
for op in operator_list:
globals()[op[0]] = __convert_to_v2__(
op[0], parent_names=op[1], is_default_name=False)
globals()[op[0]].__name__ = op[0]
...@@ -12,24 +12,30 @@ ...@@ -12,24 +12,30 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ['batch']
def batch(reader, batch_size): def batch(reader, batch_size):
""" """
Create a batch reader. Create a batched reader.
:param reader: the data reader to read from. :param reader: the data reader to read from.
:param batch_size: batch_size :type reader: callable
:return: the batch reader. :param batch_size: size of each mini-batch
:type batch_size: int
:return: the batched reader.
:rtype: callable
""" """
def batch_reader(): def batch_reader():
r = reader() r = reader()
batch = [] b = []
for instance in r: for instance in r:
batch.append(instance) b.append(instance)
if len(batch) == batch_size: if len(b) == batch_size:
yield batch yield b
batch = [] b = []
if batch: if b:
yield batch yield b
return batch_reader return batch_reader
...@@ -38,6 +38,7 @@ def __initialize__(): ...@@ -38,6 +38,7 @@ def __initialize__():
parent_names=parents, parent_names=parents,
is_default_name='name' in argspec.args) is_default_name='name' in argspec.args)
globals()[each_subnetwork] = v2_subnet globals()[each_subnetwork] = v2_subnet
globals()[each_subnetwork].__name__ = each_subnetwork
global __all__ global __all__
__all__.append(each_subnetwork) __all__.append(each_subnetwork)
......
import py_paddle.swig_paddle as swig_api import py_paddle.swig_paddle as swig_api
import paddle.trainer_config_helpers.optimizers as v1_optimizers
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.v2 import paddle.trainer_config_helpers.optimizers as v1_optimizers
"""
Optimizers(update equation) for SGD method.
TODO(yuyang18): Complete comments.
"""
__all__ = [ __all__ = [
'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta',
...@@ -44,7 +49,7 @@ class Optimizer(object): ...@@ -44,7 +49,7 @@ class Optimizer(object):
class Momentum(Optimizer): class Momentum(Optimizer):
def __init__(self, momentum=None, sparse=False, **kwargs): def __init__(self, momentum=None, sparse=False, **kwargs):
learning_method = v1_optimizers.MomentumOptimizer( learning_method = v1_optimizers.MomentumOptimizer(
momentum=None, sparse=False) momentum=momentum, sparse=sparse)
super(Momentum, self).__init__( super(Momentum, self).__init__(
learning_method=learning_method, **kwargs) learning_method=learning_method, **kwargs)
......
import numpy as np import numpy as np
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import struct
import tarfile
import cStringIO
from topology import Topology from topology import Topology
__all__ = ['Parameters', 'create'] __all__ = ['Parameters', 'create']
...@@ -10,6 +12,7 @@ __all__ = ['Parameters', 'create'] ...@@ -10,6 +12,7 @@ __all__ = ['Parameters', 'create']
def create(layers): def create(layers):
""" """
Create parameter pool by topology. Create parameter pool by topology.
:param layers: :param layers:
:return: :return:
""" """
...@@ -67,6 +70,7 @@ class Parameters(object): ...@@ -67,6 +70,7 @@ class Parameters(object):
def keys(self): def keys(self):
""" """
keys are the names of each parameter. keys are the names of each parameter.
:return: list of parameter name :return: list of parameter name
:rtype: list :rtype: list
""" """
...@@ -75,6 +79,7 @@ class Parameters(object): ...@@ -75,6 +79,7 @@ class Parameters(object):
def names(self): def names(self):
""" """
names of each parameter. names of each parameter.
:return: list of parameter name :return: list of parameter name
:rtype: list :rtype: list
""" """
...@@ -83,6 +88,7 @@ class Parameters(object): ...@@ -83,6 +88,7 @@ class Parameters(object):
def has_key(self, key): def has_key(self, key):
""" """
has_key return true if there are such parameter name == key has_key return true if there are such parameter name == key
:param key: Parameter name :param key: Parameter name
:type key: basestring :type key: basestring
:return: True if contains such key :return: True if contains such key
...@@ -118,6 +124,12 @@ class Parameters(object): ...@@ -118,6 +124,12 @@ class Parameters(object):
if len(self.__gradient_machines__) == 0: if len(self.__gradient_machines__) == 0:
# create new parameter in python numpy. # create new parameter in python numpy.
if len(self.__tmp_params__) != 0:
ret_list = [
mat for name, mat in self.__tmp_params__ if name == key
]
if len(ret_list) == 1:
return ret_list[0]
return np.ndarray(shape=shape, dtype=np.float32) return np.ndarray(shape=shape, dtype=np.float32)
else: else:
for each_gradient_machine in self.__gradient_machines__: for each_gradient_machine in self.__gradient_machines__:
...@@ -136,6 +148,7 @@ class Parameters(object): ...@@ -136,6 +148,7 @@ class Parameters(object):
def get_shape(self, key): def get_shape(self, key):
""" """
get shape of the parameter. get shape of the parameter.
:param key: parameter name :param key: parameter name
:type key: basestring :type key: basestring
:return: parameter's shape :return: parameter's shape
...@@ -190,6 +203,7 @@ class Parameters(object): ...@@ -190,6 +203,7 @@ class Parameters(object):
def set(self, parameter_name, value): def set(self, parameter_name, value):
""" """
Set parameter by parameter name & matrix. Set parameter by parameter name & matrix.
:param parameter_name: parameter name :param parameter_name: parameter name
:type parameter_name: basestring :type parameter_name: basestring
:param value: parameter matrix :param value: parameter matrix
...@@ -222,6 +236,67 @@ class Parameters(object): ...@@ -222,6 +236,67 @@ class Parameters(object):
self.__gradient_machines__.append(gradient_machine) self.__gradient_machines__.append(gradient_machine)
def serialize(self, name, f):
"""
:param name:
:param f:
:type f: file
:return:
"""
param = self.get(name)
size = reduce(lambda a, b: a * b, param.shape)
f.write(struct.pack("IIQ", 0, 4, size))
param = param.astype(np.float32)
f.write(param.tobytes())
def deserialize(self, name, f):
"""
:param name:
:param f:
:type f: file
:return:
"""
f.read(16) # header
arr = np.frombuffer(f.read(), dtype=np.float32)
self.set(name, arr.reshape(self.get_shape(name)))
def to_tar(self, f):
tar = tarfile.TarFile(fileobj=f, mode='w')
for nm in self.names():
buf = cStringIO.StringIO()
self.serialize(nm, buf)
tarinfo = tarfile.TarInfo(name=nm)
buf.seek(0)
tarinfo.size = len(buf.getvalue())
tar.addfile(tarinfo, buf)
conf = self.__param_conf__[nm]
confStr = conf.SerializeToString()
tarinfo = tarfile.TarInfo(name="%s.protobuf" % nm)
tarinfo.size = len(confStr)
buf = cStringIO.StringIO(confStr)
buf.seek(0)
tar.addfile(tarinfo, fileobj=buf)
@staticmethod
def from_tar(f):
params = Parameters()
tar = tarfile.TarFile(fileobj=f, mode='r')
for finfo in tar:
assert isinstance(finfo, tarfile.TarInfo)
if finfo.name.endswith('.protobuf'):
f = tar.extractfile(finfo)
conf = ParameterConfig()
conf.ParseFromString(f.read())
params.__append_config__(conf)
for param_name in params.names():
f = tar.extractfile(param_name)
params.deserialize(param_name, f)
return params
def __get_parameter_in_gradient_machine__(gradient_machine, name): def __get_parameter_in_gradient_machine__(gradient_machine, name):
""" """
......
...@@ -12,13 +12,15 @@ ...@@ -12,13 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.trainer_config_helpers.poolings import * import paddle.trainer_config_helpers.poolings
import copy
__all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"] __all__ = []
suffix = 'Pooling'
Max = MaxPooling for name in paddle.trainer_config_helpers.poolings.__all__:
CudnnMax = CudnnMaxPooling new_name = name[:-len(suffix)]
Avg = AvgPooling globals()[new_name] = copy.copy(
CudnnAvg = CudnnAvgPooling getattr(paddle.trainer_config_helpers.poolings, name))
Sum = SumPooling globals()[new_name].__name__ = new_name
SquareRootN = SquareRootNPooling __all__.append(new_name)
...@@ -11,15 +11,64 @@ ...@@ -11,15 +11,64 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
At training and testing time, PaddlePaddle programs need to read data. To ease
the users' work to write data reading code, we define that
# It would be too lengthy to require our users to prefix decorators with `decorator`. - A *reader* is a function that reads data (from file, network, random number
# For example, we want the following line generator, etc) and yields data items.
# - A *reader creator* is a function that returns a reader function.
# r = paddle.reader.decorator.bufferd(paddle.reader.creator.text("hello.txt")) - A *reader decorator* is a function, which accepts one or more readers, and
# returns a reader.
# to be a shorter version: - A *batch reader* is a function that reads data (from *reader*, file, network,
# random number generator, etc) and yields a batch of data items.
# r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt"))
#####################
Data Reader Interface
#####################
Indeed, *data reader* doesn't have to be a function that reads and yields data
items. It can be any function with no parameter that creates a iterable
(anything can be used in :code:`for x in iterable`)\:
.. code-block:: python
iterable = data_reader()
Element produced from the iterable should be a **single** entry of data,
**not** a mini batch. That entry of data could be a single item, or a tuple of
items.
Item should be of `supported type <http://www.paddlepaddle.org/doc/ui/data_provider
/pydataprovider2.html?highlight=dense_vector#input-types>`_ (e.g., numpy 1d
array of float32, int, list of int)
An example implementation for single item data reader creator:
.. code-block:: python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
An example implementation for multiple item data reader creator:
.. code-block:: python
def reader_creator_random_image_and_label(width, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
TODO(yuyang18): Should we add whole design doc here?
"""
import decorator
from decorator import * from decorator import *
import creator import creator
__all__ = decorator.__all__ + ['creator']
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Creator package contains some simple reader creator, which could be used in user
program.
"""
__all__ = ['np_array', 'text_file'] __all__ = ['np_array', 'text_file']
...@@ -38,7 +42,7 @@ def np_array(x): ...@@ -38,7 +42,7 @@ def np_array(x):
def text_file(path): def text_file(path):
""" """
Creates a data reader that outputs text line by line from given text file. Creates a data reader that outputs text line by line from given text file.
Trailing new line ('\n') of each line will be removed. Trailing new line ('\\\\n') of each line will be removed.
:path: path of the text file. :path: path of the text file.
:returns: data reader of text file :returns: data reader of text file
......
...@@ -28,9 +28,11 @@ def map_readers(func, *readers): ...@@ -28,9 +28,11 @@ def map_readers(func, *readers):
Creates a data reader that outputs return value of function using Creates a data reader that outputs return value of function using
output of each data readers as arguments. output of each data readers as arguments.
:param func: function to use. :param func: function to use. The type of func should be (Sample) => Sample
:param *readers: readers whose outputs will be used as arguments of func. :type: callable
:returns: the created data reader. :param readers: readers whose outputs will be used as arguments of func.
:return: the created data reader.
:rtype: callable
""" """
def reader(): def reader():
...@@ -45,16 +47,19 @@ def map_readers(func, *readers): ...@@ -45,16 +47,19 @@ def map_readers(func, *readers):
def shuffle(reader, buf_size): def shuffle(reader, buf_size):
""" """
Creates a data reader whose data output is suffled. Creates a data reader whose data output is shuffled.
Output from the iterator that created by original reader will be Output from the iterator that created by original reader will be
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
is determined by argument buf_size. is determined by argument buf_size.
:param reader: the original reader whose output will be shuffled. :param reader: the original reader whose output will be shuffled.
:type reader: callable
:param buf_size: shuffle buffer size. :param buf_size: shuffle buffer size.
:type buf_size: int
:returns:the new reader whose output is shuffled. :return: the new reader whose output is shuffled.
:rtype: callable
""" """
def data_reader(): def data_reader():
...@@ -88,7 +93,8 @@ def chain(*readers): ...@@ -88,7 +93,8 @@ def chain(*readers):
[0, 0, 0, 1, 1, 1, 2, 2, 2] [0, 0, 0, 1, 1, 1, 2, 2, 2]
:param readers: input readers. :param readers: input readers.
:returns: the new data reader. :return: the new data reader.
:rtype: callable
""" """
def reader(): def reader():
...@@ -115,12 +121,13 @@ def compose(*readers, **kwargs): ...@@ -115,12 +121,13 @@ def compose(*readers, **kwargs):
The composed reader will output: The composed reader will output:
(1, 2, 3, 4, 5) (1, 2, 3, 4, 5)
:*readers: readers that will be composed together. :param readers: readers that will be composed together.
:check_alignment: if True, will check if input readers are aligned :param check_alignment: if True, will check if input readers are aligned
correctly. If False, will not check alignment and trailing outputs correctly. If False, will not check alignment and trailing outputs
will be discarded. Defaults to True. will be discarded. Defaults to True.
:type check_alignment: bool
:returns: the new data reader. :return: the new data reader.
:raises ComposeNotAligned: outputs of readers are not aligned. :raises ComposeNotAligned: outputs of readers are not aligned.
Will not raise when check_alignment is set to False. Will not raise when check_alignment is set to False.
...@@ -161,7 +168,9 @@ def buffered(reader, size): ...@@ -161,7 +168,9 @@ def buffered(reader, size):
as the buffer is not empty. as the buffer is not empty.
:param reader: the data reader to read from. :param reader: the data reader to read from.
:type reader: callable
:param size: max buffer size. :param size: max buffer size.
:type size: int
:returns: the buffered data reader. :returns: the buffered data reader.
""" """
...@@ -196,6 +205,13 @@ def buffered(reader, size): ...@@ -196,6 +205,13 @@ def buffered(reader, size):
def firstn(reader, n): def firstn(reader, n):
""" """
Limit the max number of samples that reader could return. Limit the max number of samples that reader could return.
:param reader: the data reader to read from.
:type reader: callable
:param n: the max number of samples that return.
:type n: int
:return: the decorated reader.
:rtype: callable
""" """
# TODO(yuyang18): Check if just drop the reader, could clean the opened # TODO(yuyang18): Check if just drop the reader, could clean the opened
......
...@@ -22,7 +22,7 @@ cd $SCRIPTPATH ...@@ -22,7 +22,7 @@ cd $SCRIPTPATH
$1 -m pip install ../../../../paddle/dist/*.whl $1 -m pip install ../../../../paddle/dist/*.whl
test_list="test_data_feeder.py" test_list="test_data_feeder.py test_parameters.py"
export PYTHONPATH=$PWD/../../../../python/ export PYTHONPATH=$PWD/../../../../python/
......
import unittest
import sys
try:
import py_paddle
del py_paddle
except ImportError:
print >> sys.stderr, "It seems swig of Paddle is not installed, this " \
"unittest will not be run."
sys.exit(0)
import paddle.v2.parameters as parameters
from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import random
import cStringIO
import numpy
def __rand_param_config__(name):
conf = ParameterConfig()
conf.name = name
size = 1
for i in xrange(2):
dim = random.randint(1, 1000)
conf.dims.append(dim)
size *= dim
conf.size = size
assert conf.IsInitialized()
return conf
class TestParameters(unittest.TestCase):
def test_serialization(self):
params = parameters.Parameters()
params.__append_config__(__rand_param_config__("param_0"))
params.__append_config__(__rand_param_config__("param_1"))
for name in params.names():
param = params.get(name)
param[:] = numpy.random.uniform(
-1.0, 1.0, size=params.get_shape(name))
params.set(name, param)
tmp_file = cStringIO.StringIO()
params.to_tar(tmp_file)
tmp_file.seek(0)
params_dup = parameters.Parameters.from_tar(tmp_file)
self.assertEqual(params_dup.names(), params.names())
for name in params.names():
self.assertEqual(params.get_shape(name), params_dup.get_shape(name))
p0 = params.get(name)
p1 = params_dup.get(name)
self.assertTrue(numpy.isclose(p0, p1).all())
if __name__ == '__main__':
unittest.main()
...@@ -16,6 +16,7 @@ import paddle.v2.layer as layer ...@@ -16,6 +16,7 @@ import paddle.v2.layer as layer
import paddle.v2.topology as topology import paddle.v2.topology as topology
import paddle.v2.data_type as data_type import paddle.v2.data_type as data_type
import paddle.trainer_config_helpers as conf_helps import paddle.trainer_config_helpers as conf_helps
import paddle.trainer.PyDataProvider2 as pydp2
class TestTopology(unittest.TestCase): class TestTopology(unittest.TestCase):
...@@ -35,13 +36,13 @@ class TestTopology(unittest.TestCase): ...@@ -35,13 +36,13 @@ class TestTopology(unittest.TestCase):
pixel_data_type = filter(lambda type: type[0] == "pixel", data_types) pixel_data_type = filter(lambda type: type[0] == "pixel", data_types)
self.assertEqual(len(pixel_data_type), 1) self.assertEqual(len(pixel_data_type), 1)
pixel_data_type = pixel_data_type[0] pixel_data_type = pixel_data_type[0]
self.assertEqual(pixel_data_type[1].type, data_type.DataType.Dense) self.assertEqual(pixel_data_type[1].type, pydp2.DataType.Dense)
self.assertEqual(pixel_data_type[1].dim, 784) self.assertEqual(pixel_data_type[1].dim, 784)
label_data_type = filter(lambda type: type[0] == "label", data_types) label_data_type = filter(lambda type: type[0] == "label", data_types)
self.assertEqual(len(label_data_type), 1) self.assertEqual(len(label_data_type), 1)
label_data_type = label_data_type[0] label_data_type = label_data_type[0]
self.assertEqual(label_data_type[1].type, data_type.DataType.Index) self.assertEqual(label_data_type[1].type, pydp2.DataType.Index)
self.assertEqual(label_data_type[1].dim, 10) self.assertEqual(label_data_type[1].dim, 10)
def test_get_layer(self): def test_get_layer(self):
......
...@@ -9,6 +9,10 @@ from . import optimizer as v2_optimizer ...@@ -9,6 +9,10 @@ from . import optimizer as v2_optimizer
from . import parameters as v2_parameters from . import parameters as v2_parameters
__all__ = ['SGD'] __all__ = ['SGD']
"""
Trainer package
TODO(yuyang18): Complete comments.
"""
def default_event_handler(event): def default_event_handler(event):
...@@ -22,15 +26,21 @@ def default_event_handler(event): ...@@ -22,15 +26,21 @@ def default_event_handler(event):
pass pass
class SGD(): class SGD(object):
def __init__(self, cost, parameters, update_equation):
""" """
Simple SGD Trainer. Simple SGD Trainer.
TODO(yuyang18): Complete comments
:param update_equation: The optimizer object. :param update_equation: The optimizer object.
:type update_equation: v2_optimizer.Optimizer :type update_equation: paddle.v2.optimizer.Optimizer
:param cost: Target cost that neural network should be optimized.
:type cost: paddle.v2.config_base.Layer
:param parameters: The parameters dictionary.
:type parameters: paddle.v2.parameters.Parameters
""" """
def __init__(self, cost, parameters, update_equation):
if not isinstance(parameters, v2_parameters.Parameters): if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters') raise TypeError('parameters should be parameters')
...@@ -47,29 +57,26 @@ class SGD(): ...@@ -47,29 +57,26 @@ class SGD():
self.__topology_in_proto__, api.CREATE_MODE_NORMAL, self.__topology_in_proto__, api.CREATE_MODE_NORMAL,
self.__optimizer__.enable_types()) self.__optimizer__.enable_types())
assert isinstance(gm, api.GradientMachine) assert isinstance(gm, api.GradientMachine)
parameters.append_gradient_machine(gm)
self.__gradient_machine__ = gm self.__gradient_machine__ = gm
self.__gradient_machine__.randParameters() self.__gradient_machine__.randParameters()
parameters.append_gradient_machine(gm)
def train(self, reader, num_passes=1, event_handler=None, reader_dict=None): def train(self, reader, num_passes=1, event_handler=None, feeding=None):
""" """
Training method. Will train num_passes of input data. Training method. Will train num_passes of input data.
:param reader: :param reader:
:param topology: Network Topology, use one or more Layers to represent it.
:param parameters: The parameter pools.
:param num_passes: The total train passes. :param num_passes: The total train passes.
:param event_handler: Event handler. A method will be invoked when event :param event_handler: Event handler. A method will be invoked when event
occurred. occurred.
:type event_handler: (BaseEvent) => None :type event_handler: (BaseEvent) => None
:param feeding: Feeding is a map of neural network input name and array
index that reader returns.
:type feeding: dict
:return: :return:
""" """
if event_handler is None: if event_handler is None:
event_handler = default_event_handler event_handler = default_event_handler
if reader_dict is None:
reader_dict = self.default_reader_dict()
__check_train_args__(**locals()) __check_train_args__(**locals())
updater = self.__optimizer__.create_local_updater() updater = self.__optimizer__.create_local_updater()
...@@ -81,9 +88,7 @@ class SGD(): ...@@ -81,9 +88,7 @@ class SGD():
pass_evaluator = self.__gradient_machine__.makeEvaluator() pass_evaluator = self.__gradient_machine__.makeEvaluator()
assert isinstance(pass_evaluator, api.Evaluator) assert isinstance(pass_evaluator, api.Evaluator)
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
feeder = DataFeeder(self.__data_types__, feeding)
feeder = DataFeeder(self.__data_types__, reader_dict)
for pass_id in xrange(num_passes): for pass_id in xrange(num_passes):
event_handler(v2_event.BeginPass(pass_id)) event_handler(v2_event.BeginPass(pass_id))
pass_evaluator.start() pass_evaluator.start()
...@@ -101,7 +106,7 @@ class SGD(): ...@@ -101,7 +106,7 @@ class SGD():
for each_param in self.__gradient_machine__.getNonStaticParameters( for each_param in self.__gradient_machine__.getNonStaticParameters(
): ):
updater.update(each_param) updater.update(each_param)
cost_sum = out_args.sumCosts() cost_sum = out_args.sum()
cost = cost_sum / len(data_batch) cost = cost_sum / len(data_batch)
updater.finishBatch(cost) updater.finishBatch(cost)
batch_evaluator.finish() batch_evaluator.finish()
...@@ -117,17 +122,8 @@ class SGD(): ...@@ -117,17 +122,8 @@ class SGD():
event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
self.__gradient_machine__.finish() self.__gradient_machine__.finish()
def default_reader_dict(self): def test(self, reader, feeding=None):
reader_dict = dict() feeder = DataFeeder(self.__data_types__, feeding)
for i, tp in enumerate(self.__data_types__):
reader_dict[tp[0]] = i
return reader_dict
def test(self, reader, reader_dict=None):
if reader_dict is None:
reader_dict = self.default_reader_dict()
feeder = DataFeeder(self.__data_types__, reader_dict)
evaluator = self.__gradient_machine__.makeEvaluator() evaluator = self.__gradient_machine__.makeEvaluator()
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
evaluator.start() evaluator.start()
...@@ -137,7 +133,7 @@ class SGD(): ...@@ -137,7 +133,7 @@ class SGD():
num_samples += len(data_batch) num_samples += len(data_batch)
self.__gradient_machine__.forward( self.__gradient_machine__.forward(
feeder(data_batch), out_args, api.PASS_TEST) feeder(data_batch), out_args, api.PASS_TEST)
total_cost += out_args.sumCosts() total_cost += out_args.sum()
self.__gradient_machine__.eval(evaluator) self.__gradient_machine__.eval(evaluator)
evaluator.finish() evaluator.finish()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册