From 26445368a2fb2d95598d80b5fad0d880c04bd0da Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 6 Mar 2017 15:45:19 +0800 Subject: [PATCH] Rename reader_dict to feeding * Also fix some other bugs. * Fix #1495 --- demo/image_classification/api_v2_train.py | 13 ++++++----- demo/introduction/api_train_v2.py | 22 ++++++++--------- demo/mnist/api_train_v2.py | 4 ++-- demo/semantic_role_labeling/api_train_v2.py | 6 ++--- demo/sentiment/train_v2.py | 23 +++++++----------- demo/seqToseq/api_train_v2.py | 6 ++--- python/paddle/v2/data_feeder.py | 24 ++++++++++++++----- python/paddle/v2/inference.py | 16 ++++--------- python/paddle/v2/trainer.py | 26 ++++++--------------- 9 files changed, 64 insertions(+), 76 deletions(-) diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py index e0fc0e04bb..7134fa61e8 100644 --- a/demo/image_classification/api_v2_train.py +++ b/demo/image_classification/api_v2_train.py @@ -13,8 +13,9 @@ # limitations under the License import sys + import paddle.v2 as paddle -from api_v2_vgg import vgg_bn_drop + from api_v2_resnet import resnet_cifar10 @@ -23,7 +24,7 @@ def main(): classdim = 10 # PaddlePaddle init - paddle.init(use_gpu=True, trainer_count=1) + paddle.init(use_gpu=False, trainer_count=1) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(datadim)) @@ -68,8 +69,8 @@ def main(): result = trainer.test( reader=paddle.batch( paddle.dataset.cifar.test10(), batch_size=128), - reader_dict={'image': 0, - 'label': 1}) + feeding={'image': 0, + 'label': 1}) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) # Create trainer @@ -83,8 +84,8 @@ def main(): batch_size=128), num_passes=5, event_handler=event_handler, - reader_dict={'image': 0, - 'label': 1}) + feeding={'image': 0, + 'label': 1}) if __name__ == '__main__': diff --git a/demo/introduction/api_train_v2.py b/demo/introduction/api_train_v2.py index 75dd65f9fc..84125c3b4b 100644 --- a/demo/introduction/api_train_v2.py +++ b/demo/introduction/api_train_v2.py @@ -30,26 +30,26 @@ def main(): def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) + print "Pass %d, Batch %d, Cost %f" % ( + event.pass_id, event.batch_id, event.cost) if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.reader.batched( - uci_housing.test(), batch_size=2), - reader_dict={'x': 0, + if (event.pass_id + 1) % 10 == 0: + result = trainer.test( + reader=paddle.batch( + uci_housing.test(), batch_size=2), + feeding={'x': 0, 'y': 1}) - if event.pass_id % 10 == 0: - print "Test %d, %s" % (event.pass_id, result.metrics) + print "Test %d, %.2f" % (event.pass_id, result.cost) # training trainer.train( - reader=paddle.reader.batched( + reader=paddle.batch( paddle.reader.shuffle( uci_housing.train(), buf_size=500), batch_size=2), - reader_dict={'x': 0, - 'y': 1}, + feeding={'x': 0, + 'y': 1}, event_handler=event_handler, num_passes=30) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 072b2a08da..68761be80f 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -92,7 +92,7 @@ def main(): def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 1000 == 0: - result = trainer.test(reader=paddle.reader.batched( + result = trainer.test(reader=paddle.batch( paddle.dataset.mnist.test(), batch_size=256)) print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % ( @@ -103,7 +103,7 @@ def main(): parameters.to_tar(f) elif isinstance(event, paddle.event.EndPass): - result = trainer.test(reader=paddle.reader.batched( + result = trainer.test(reader=paddle.batch( paddle.dataset.mnist.test(), batch_size=128)) print "Test with Pass %d, Cost %f, %s\n" % ( event.pass_id, result.cost, result.metrics) diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py index 15db922b97..036cad4b0a 100644 --- a/demo/semantic_role_labeling/api_train_v2.py +++ b/demo/semantic_role_labeling/api_train_v2.py @@ -163,11 +163,11 @@ def main(): update_equation=optimizer) parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) - trn_reader = paddle.reader.batched( + trn_reader = paddle.batch( paddle.reader.shuffle( conll05.test(), buf_size=8192), batch_size=10) - reader_dict = { + feeding = { 'word_data': 0, 'ctx_n2_data': 1, 'ctx_n1_data': 2, @@ -183,7 +183,7 @@ def main(): reader=trn_reader, event_handler=event_handler, num_passes=10000, - reader_dict=reader_dict) + feeding=feeding) if __name__ == '__main__': diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py index 3a266e74ea..fd7243cbe6 100644 --- a/demo/sentiment/train_v2.py +++ b/demo/sentiment/train_v2.py @@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling import paddle.v2 as paddle -def convolution_net(input_dim, - class_dim=2, - emb_dim=128, - hid_dim=128, - is_predict=False): +def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128): data = paddle.layer.data("word", paddle.data_type.integer_value_sequence(input_dim)) emb = paddle.layer.embedding(input=data, size=emb_dim) @@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim, class_dim=2, emb_dim=128, hid_dim=512, - stacked_num=3, - is_predict=False): + stacked_num=3): """ A Wrapper for sentiment classification task. This network uses bi-directional recurrent network, @@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim, if __name__ == '__main__': # init - paddle.init(use_gpu=True, trainer_count=4) + paddle.init(use_gpu=False, trainer_count=4) # network config print 'load dictionary...' @@ -143,11 +138,11 @@ if __name__ == '__main__': sys.stdout.flush() if isinstance(event, paddle.event.EndPass): result = trainer.test( - reader=paddle.reader.batched( + reader=paddle.batch( lambda: paddle.dataset.imdb.test(word_dict), batch_size=128), - reader_dict={'word': 0, - 'label': 1}) + feeding={'word': 0, + 'label': 1}) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) # create trainer @@ -156,11 +151,11 @@ if __name__ == '__main__': update_equation=adam_optimizer) trainer.train( - reader=paddle.reader.batched( + reader=paddle.batch( paddle.reader.shuffle( lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), batch_size=100), event_handler=event_handler, - reader_dict={'word': 0, - 'label': 1}, + feeding={'word': 0, + 'label': 1}, num_passes=10) diff --git a/demo/seqToseq/api_train_v2.py b/demo/seqToseq/api_train_v2.py index a5f59ec379..5b7506b152 100644 --- a/demo/seqToseq/api_train_v2.py +++ b/demo/seqToseq/api_train_v2.py @@ -80,13 +80,13 @@ def main(): update_equation=optimizer) # define data reader - reader_dict = { + feeding = { 'source_language_word': 0, 'target_language_word': 1, 'target_language_next_word': 2 } - wmt14_reader = paddle.reader.batched( + wmt14_reader = paddle.batch( paddle.reader.shuffle( train_reader("data/pre-wmt14/train/train"), buf_size=8192), batch_size=5) @@ -103,7 +103,7 @@ def main(): reader=wmt14_reader, event_handler=event_handler, num_passes=10000, - reader_dict=reader_dict) + feeding=feeding) if __name__ == '__main__': diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index b7465238be..ba77fecf21 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -14,11 +14,18 @@ from py_paddle import DataProviderConverter -import data_type +import paddle.trainer.PyDataProvider2 as pydp2 __all__ = ['DataFeeder'] +def default_feeding_map(data_types): + reader_dict = dict() + for i, tp in enumerate(data_types): + reader_dict[tp[0]] = i + return reader_dict + + class DataFeeder(DataProviderConverter): """ DataFeeder converts the data returned by paddle.reader into a data structure @@ -60,16 +67,21 @@ class DataFeeder(DataProviderConverter): :type data_types: list :param reader_dict: A dictionary to specify the position of each data in the input data. - :type reader_dict: dict + :type feeding: dict """ - def __init__(self, data_types, reader_dict): + def __init__(self, data_types, feeding=None): self.input_names = [] input_types = [] - self.reader_dict = reader_dict + if feeding is None: + feeding = default_feeding_map(data_types) + + self.feeding = feeding for each in data_types: self.input_names.append(each[0]) - assert isinstance(each[1], data_type.InputType) + if not isinstance(each[1], pydp2.InputType): + raise TypeError("second item in each data_type should be an " + "InputType") input_types.append(each[1]) DataProviderConverter.__init__(self, input_types) @@ -90,7 +102,7 @@ class DataFeeder(DataProviderConverter): for each in data: reorder = [] for name in self.input_names: - reorder.append(each[self.reader_dict[name]]) + reorder.append(each[self.feeding[name]]) retv.append(reorder) return retv diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 476fd3fa45..7d889bce7f 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -21,10 +21,8 @@ class Inference(object): self.__gradient_machine__ = gm self.__data_types__ = topo.data_type() - def iter_infer(self, reader, reader_dict=None): - if reader_dict is None: - reader_dict = self.default_reader_dict() - feeder = DataFeeder(self.__data_types__, reader_dict) + def iter_infer(self, reader, feeding=None): + feeder = DataFeeder(self.__data_types__, feeding) self.__gradient_machine__.start() for data_batch in reader(): yield self.__gradient_machine__.forwardTest(feeder(data_batch)) @@ -47,13 +45,7 @@ class Inference(object): else: return retv - def default_reader_dict(self): - reader_dict = dict() - for i, tp in enumerate(self.__data_types__): - reader_dict[tp[0]] = i - return reader_dict - -def infer(output, parameters, reader, reader_dict=None, field='value'): +def infer(output, parameters, reader, feeding=None, field='value'): inferer = Inference(output=output, parameters=parameters) - return inferer.infer(field=field, reader=reader, reader_dict=reader_dict) + return inferer.infer(field=field, reader=reader, feeding=feeding) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 187abaf9a3..7bd3e2c565 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -61,7 +61,7 @@ class SGD(object): self.__gradient_machine__.randParameters() parameters.append_gradient_machine(gm) - def train(self, reader, num_passes=1, event_handler=None, reader_dict=None): + def train(self, reader, num_passes=1, event_handler=None, feeding=None): """ Training method. Will train num_passes of input data. @@ -70,14 +70,13 @@ class SGD(object): :param event_handler: Event handler. A method will be invoked when event occurred. :type event_handler: (BaseEvent) => None + :param feeding: Feeding is a map of neural network input name and array + index that reader returns. + :type feeding: dict :return: """ if event_handler is None: event_handler = default_event_handler - - if reader_dict is None: - reader_dict = self.default_reader_dict() - __check_train_args__(**locals()) updater = self.__optimizer__.create_local_updater() @@ -89,9 +88,7 @@ class SGD(object): pass_evaluator = self.__gradient_machine__.makeEvaluator() assert isinstance(pass_evaluator, api.Evaluator) out_args = api.Arguments.createArguments(0) - - feeder = DataFeeder(self.__data_types__, reader_dict) - + feeder = DataFeeder(self.__data_types__, feeding) for pass_id in xrange(num_passes): event_handler(v2_event.BeginPass(pass_id)) pass_evaluator.start() @@ -125,17 +122,8 @@ class SGD(object): event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) self.__gradient_machine__.finish() - def default_reader_dict(self): - reader_dict = dict() - for i, tp in enumerate(self.__data_types__): - reader_dict[tp[0]] = i - return reader_dict - - def test(self, reader, reader_dict=None): - if reader_dict is None: - reader_dict = self.default_reader_dict() - - feeder = DataFeeder(self.__data_types__, reader_dict) + def test(self, reader, feeding=None): + feeder = DataFeeder(self.__data_types__, feeding) evaluator = self.__gradient_machine__.makeEvaluator() out_args = api.Arguments.createArguments(0) evaluator.start() -- GitLab