提交 ca62c104 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #1564 from reyoung/feature/rename_reader_dict_to_feeding

Feature/rename reader dict to feeding
......@@ -13,8 +13,9 @@
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
from api_v2_resnet import resnet_cifar10
......@@ -23,7 +24,7 @@ def main():
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=True, trainer_count=1)
paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
......@@ -68,8 +69,8 @@ def main():
result = trainer.test(
reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
reader_dict={'image': 0,
'label': 1})
feeding={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
......@@ -83,8 +84,8 @@ def main():
batch_size=128),
num_passes=5,
event_handler=event_handler,
reader_dict={'image': 0,
'label': 1})
feeding={'image': 0,
'label': 1})
if __name__ == '__main__':
......
......@@ -30,26 +30,26 @@ def main():
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.reader.batched(
uci_housing.test(), batch_size=2),
reader_dict={'x': 0,
if (event.pass_id + 1) % 10 == 0:
result = trainer.test(
reader=paddle.batch(
uci_housing.test(), batch_size=2),
feeding={'x': 0,
'y': 1})
if event.pass_id % 10 == 0:
print "Test %d, %s" % (event.pass_id, result.metrics)
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
trainer.train(
reader=paddle.reader.batched(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
reader_dict={'x': 0,
'y': 1},
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)
......
......@@ -92,7 +92,7 @@ def main():
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 1000 == 0:
result = trainer.test(reader=paddle.reader.batched(
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=256))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
......@@ -103,7 +103,7 @@ def main():
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.reader.batched(
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
......
......@@ -163,11 +163,11 @@ def main():
update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.reader.batched(
trn_reader = paddle.batch(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
reader_dict = {
feeding = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
......@@ -183,7 +183,7 @@ def main():
reader=trn_reader,
event_handler=event_handler,
num_passes=10000,
reader_dict=reader_dict)
feeding=feeding)
if __name__ == '__main__':
......
......@@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling
import paddle.v2 as paddle
def convolution_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=128,
is_predict=False):
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
......@@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3,
is_predict=False):
stacked_num=3):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
......@@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim,
if __name__ == '__main__':
# init
paddle.init(use_gpu=True, trainer_count=4)
paddle.init(use_gpu=False, trainer_count=4)
# network config
print 'load dictionary...'
......@@ -143,11 +138,11 @@ if __name__ == '__main__':
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.reader.batched(
reader=paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict),
batch_size=128),
reader_dict={'word': 0,
'label': 1})
feeding={'word': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
......@@ -156,11 +151,11 @@ if __name__ == '__main__':
update_equation=adam_optimizer)
trainer.train(
reader=paddle.reader.batched(
reader=paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100),
event_handler=event_handler,
reader_dict={'word': 0,
'label': 1},
feeding={'word': 0,
'label': 1},
num_passes=10)
......@@ -80,13 +80,13 @@ def main():
update_equation=optimizer)
# define data reader
reader_dict = {
feeding = {
'source_language_word': 0,
'target_language_word': 1,
'target_language_next_word': 2
}
wmt14_reader = paddle.reader.batched(
wmt14_reader = paddle.batch(
paddle.reader.shuffle(
train_reader("data/pre-wmt14/train/train"), buf_size=8192),
batch_size=5)
......@@ -103,7 +103,7 @@ def main():
reader=wmt14_reader,
event_handler=event_handler,
num_passes=10000,
reader_dict=reader_dict)
feeding=feeding)
if __name__ == '__main__':
......
......@@ -14,11 +14,18 @@
from py_paddle import DataProviderConverter
import data_type
import paddle.trainer.PyDataProvider2 as pydp2
__all__ = ['DataFeeder']
def default_feeding_map(data_types):
reader_dict = dict()
for i, tp in enumerate(data_types):
reader_dict[tp[0]] = i
return reader_dict
class DataFeeder(DataProviderConverter):
"""
DataFeeder converts the data returned by paddle.reader into a data structure
......@@ -60,16 +67,21 @@ class DataFeeder(DataProviderConverter):
:type data_types: list
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type reader_dict: dict
:type feeding: dict
"""
def __init__(self, data_types, reader_dict):
def __init__(self, data_types, feeding=None):
self.input_names = []
input_types = []
self.reader_dict = reader_dict
if feeding is None:
feeding = default_feeding_map(data_types)
self.feeding = feeding
for each in data_types:
self.input_names.append(each[0])
assert isinstance(each[1], data_type.InputType)
if not isinstance(each[1], pydp2.InputType):
raise TypeError("second item in each data_type should be an "
"InputType")
input_types.append(each[1])
DataProviderConverter.__init__(self, input_types)
......@@ -90,7 +102,7 @@ class DataFeeder(DataProviderConverter):
for each in data:
reorder = []
for name in self.input_names:
reorder.append(each[self.reader_dict[name]])
reorder.append(each[self.feeding[name]])
retv.append(reorder)
return retv
......
......@@ -21,10 +21,8 @@ class Inference(object):
self.__gradient_machine__ = gm
self.__data_types__ = topo.data_type()
def iter_infer(self, reader, reader_dict=None):
if reader_dict is None:
reader_dict = self.default_reader_dict()
feeder = DataFeeder(self.__data_types__, reader_dict)
def iter_infer(self, reader, feeding=None):
feeder = DataFeeder(self.__data_types__, feeding)
self.__gradient_machine__.start()
for data_batch in reader():
yield self.__gradient_machine__.forwardTest(feeder(data_batch))
......@@ -47,13 +45,7 @@ class Inference(object):
else:
return retv
def default_reader_dict(self):
reader_dict = dict()
for i, tp in enumerate(self.__data_types__):
reader_dict[tp[0]] = i
return reader_dict
def infer(output, parameters, reader, reader_dict=None, field='value'):
def infer(output, parameters, reader, feeding=None, field='value'):
inferer = Inference(output=output, parameters=parameters)
return inferer.infer(field=field, reader=reader, reader_dict=reader_dict)
return inferer.infer(field=field, reader=reader, feeding=feeding)
......@@ -61,7 +61,7 @@ class SGD(object):
self.__gradient_machine__.randParameters()
parameters.append_gradient_machine(gm)
def train(self, reader, num_passes=1, event_handler=None, reader_dict=None):
def train(self, reader, num_passes=1, event_handler=None, feeding=None):
"""
Training method. Will train num_passes of input data.
......@@ -70,14 +70,13 @@ class SGD(object):
:param event_handler: Event handler. A method will be invoked when event
occurred.
:type event_handler: (BaseEvent) => None
:param feeding: Feeding is a map of neural network input name and array
index that reader returns.
:type feeding: dict
:return:
"""
if event_handler is None:
event_handler = default_event_handler
if reader_dict is None:
reader_dict = self.default_reader_dict()
__check_train_args__(**locals())
updater = self.__optimizer__.create_local_updater()
......@@ -89,9 +88,7 @@ class SGD(object):
pass_evaluator = self.__gradient_machine__.makeEvaluator()
assert isinstance(pass_evaluator, api.Evaluator)
out_args = api.Arguments.createArguments(0)
feeder = DataFeeder(self.__data_types__, reader_dict)
feeder = DataFeeder(self.__data_types__, feeding)
for pass_id in xrange(num_passes):
event_handler(v2_event.BeginPass(pass_id))
pass_evaluator.start()
......@@ -125,17 +122,8 @@ class SGD(object):
event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
self.__gradient_machine__.finish()
def default_reader_dict(self):
reader_dict = dict()
for i, tp in enumerate(self.__data_types__):
reader_dict[tp[0]] = i
return reader_dict
def test(self, reader, reader_dict=None):
if reader_dict is None:
reader_dict = self.default_reader_dict()
feeder = DataFeeder(self.__data_types__, reader_dict)
def test(self, reader, feeding=None):
feeder = DataFeeder(self.__data_types__, feeding)
evaluator = self.__gradient_machine__.makeEvaluator()
out_args = api.Arguments.createArguments(0)
evaluator.start()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册