diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 575a32b3222a83afa0f01bef59037dca102773e7..9b7ebde5007047e34da9274bf8165cfa527e2cf1 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -1,6 +1,59 @@ import paddle.v2 as paddle +def softmax_regression(img): + predict = paddle.layer.fc(input=img, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def multilayer_perceptron(img): + # The first fully-connected layer + hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu()) + # The second fully-connected layer and the according activation function + hidden2 = paddle.layer.fc(input=hidden1, + size=64, + act=paddle.activation.Relu()) + # The thrid fully-connected layer, note that the hidden size should be 10, + # which is the number of unique digits + predict = paddle.layer.fc(input=hidden2, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def convolutional_neural_network(img): + # first conv layer + conv_pool_1 = paddle.networks.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + num_channel=1, + pool_size=2, + pool_stride=2, + act=paddle.activation.Tanh()) + # second conv layer + conv_pool_2 = paddle.networks.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + num_channel=20, + pool_size=2, + pool_stride=2, + act=paddle.activation.Tanh()) + # The first fully-connected layer + fc1 = paddle.layer.fc(input=conv_pool_2, + size=128, + act=paddle.activation.Tanh()) + # The softmax layer, note that the hidden size should be 10, + # which is the number of unique digits + predict = paddle.layer.fc(input=fc1, + size=10, + act=paddle.activation.Softmax()) + return predict + + def main(): paddle.init(use_gpu=False, trainer_count=1) @@ -9,46 +62,58 @@ def main(): name='pixel', type=paddle.data_type.dense_vector(784)) label = paddle.layer.data( name='label', type=paddle.data_type.integer_value(10)) - hidden1 = paddle.layer.fc(input=images, size=200) - hidden2 = paddle.layer.fc(input=hidden1, size=200) - inference = paddle.layer.fc(input=hidden2, - size=10, - act=paddle.activation.Softmax()) - cost = paddle.layer.classification_cost(input=inference, label=label) + + # Here we can build the prediction network in different ways. Please + # choose one by uncomment corresponding line. + predict = softmax_regression(images) + #predict = multilayer_perceptron(images) + #predict = convolutional_neural_network(images) + + cost = paddle.layer.classification_cost(input=predict, label=label) parameters = paddle.parameters.create(cost) - adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01) + optimizer = paddle.optimizer.Momentum( + learning_rate=0.1 / 128.0, + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128)) trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, - update_equation=adam_optimizer) + update_equation=optimizer) + + lists = [] def event_handler(event): if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 1000 == 0: - result = trainer.test(reader=paddle.reader.batched( - paddle.dataset.mnist.test(), batch_size=256)) - - print "Pass %d, Batch %d, Cost %.2f, %s\n" \ - "Testing cost %.2f metrics %s" % ( - event.pass_id, event.batch_id, event.cost, - event.metrics, - result.cost, result.metrics) - else: - pass + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + if isinstance(event, paddle.event.EndPass): + result = trainer.test(reader=paddle.reader.batched( + paddle.dataset.mnist.test(), batch_size=128)) + print "Test with Pass %d, Cost %f, %s\n" % ( + event.pass_id, result.cost, result.metrics) + lists.append((event.pass_id, result.cost, + result.metrics['classification_error_evaluator'])) trainer.train( reader=paddle.reader.batched( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), - batch_size=32), - event_handler=event_handler) + batch_size=128), + event_handler=event_handler, + num_passes=100) + + # find the best pass + best = sorted(lists, key=lambda list: float(list[1]))[0] + print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) + print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) # output is a softmax layer. It returns probabilities. # Shape should be (100, 10) probs = paddle.infer( - output=inference, + output=predict, parameters=parameters, reader=paddle.reader.batched( paddle.reader.firstn( diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index a429e36b63c9e812332673b66f4d8b99f3303cf8..8cbf9b9b1faac0ee0875297756853e632e4f2498 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -53,8 +53,9 @@ class EndPass(WithMetric): Event On One Pass Training Complete. """ - def __init__(self, pass_id, evaluator): + def __init__(self, pass_id, cost, evaluator): self.pass_id = pass_id + self.cost = cost WithMetric.__init__(self, evaluator)