mnist_v2.py 4.0 KB
Newer Older
Y
ying 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
import os
import sys
import gzip
import logging
import argparse
from PIL import Image
import numpy as np

import paddle.v2 as paddle
from paddle.utils.dump_v2_config import dump_v2_config

logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO)


def multilayer_perceptron(img, layer_size, lbl_dim):
    for idx, size in enumerate(layer_size):
        hidden = paddle.layer.fc(input=(img if not idx else hidden),
                                 size=size,
                                 act=paddle.activation.Relu())
    return paddle.layer.fc(input=hidden,
                           size=lbl_dim,
                           act=paddle.activation.Softmax())


def network(input_dim=784, lbl_dim=10, is_infer=False):
    images = paddle.layer.data(
        name='pixel', type=paddle.data_type.dense_vector(input_dim))

    predict = multilayer_perceptron(
        images, layer_size=[128, 64], lbl_dim=lbl_dim)

    if is_infer:
        return predict
    else:
        label = paddle.layer.data(
            name='label', type=paddle.data_type.integer_value(lbl_dim))
        return paddle.layer.classification_cost(input=predict, label=label)


def main(task="train", use_gpu=False, trainer_count=1, save_dir="models"):
    if task == "train":
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)
        cost = network()
        parameters = paddle.parameters.create(cost)
        optimizer = paddle.optimizer.Momentum(
            learning_rate=0.1 / 128.0,
            momentum=0.9,
            regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))

        trainer = paddle.trainer.SGD(cost=cost,
                                     parameters=parameters,
                                     update_equation=optimizer)

        def event_handler(event):
            if isinstance(event, paddle.event.EndIteration):
                if event.batch_id % 100 == 0:
                    logger.info("Pass %d, Batch %d, Cost %f, %s" %
                                (event.pass_id, event.batch_id, event.cost,
                                 event.metrics))
            if isinstance(event, paddle.event.EndPass):
                with gzip.open(
                        os.path.join(save_dir, "params_pass_%d.tar" %
                                     event.pass_id), "w") as f:
                    trainer.save_parameter_to_tar(f)

        trainer.train(
            reader=paddle.batch(
                paddle.reader.shuffle(
                    paddle.dataset.mnist.train(), buf_size=8192),
                batch_size=128),
            event_handler=event_handler,
            num_passes=5)
    elif task == "dump_config":
        predict = network(is_infer=True)
        dump_v2_config(predict, "trainer_config.bin", True)
    else:
        raise RuntimeError(("Error value for parameter task. "
                            "Available options are: train and dump_config."))


def parse_cmd():
    parser = argparse.ArgumentParser(
        description="PaddlePaddle MNIST demo for CAPI.")
    parser.add_argument(
        "--task",
        type=str,
        required=False,
        help=("A string indicating the taks type. "
              "Available options are: \"train\", \"dump_config\"."),
        default="train")
    parser.add_argument(
        "--use_gpu",
        type=bool,
        help=("A bool flag indicating whether to use GPU device or not."),
        default=False)
    parser.add_argument(
        "--trainer_count",
        type=int,
        help=("This parameter is only used in training task. It indicates "
              "how many computing threads are created in training."),
        default=1)
    parser.add_argument(
        "--save_dir",
        type=str,
        help=("This parameter is only used in training task. It indicates "
              "path of the directory to save the trained models."),
        default="models")
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_cmd()
    main(args.task, args.use_gpu, args.trainer_count, args.save_dir)