From 0116bc8dd26182b2f04322a100e1dd52a978e49e Mon Sep 17 00:00:00 2001 From: wwhu Date: Tue, 13 Jun 2017 19:05:14 +0800 Subject: [PATCH] add infer.py and flower dataset --- image_classification/README.md | 88 ++++++++++++++++++++++++++-------- image_classification/infer.py | 83 ++++++++++++++++++++++++++++++++ image_classification/resnet.py | 32 ++++++------- image_classification/train.py | 15 +++--- 4 files changed, 176 insertions(+), 42 deletions(-) create mode 100644 image_classification/infer.py diff --git a/image_classification/README.md b/image_classification/README.md index 39167fa1..acb8b451 100644 --- a/image_classification/README.md +++ b/image_classification/README.md @@ -3,20 +3,6 @@ 这里将介绍如何在PaddlePaddle下使用AlexNet、VGG、GoogLeNet和ResNet模型进行图像分类。图像分类问题的描述和这四种模型的介绍可以参考[PaddlePaddle book](https://github.com/PaddlePaddle/book/tree/develop/03.image_classification)。 -## 数据格式 -reader.py定义了数据格式,它读取一个图像列表文件,并从中解析出图像路径和类别标签。 - -图像列表文件是一个文本文件,其中每一行由一个图像路径和类别标签构成,二者以跳格符(Tab)隔开。类别标签用整数表示,其最小值为0。下面给出一个图像列表文件的片段示例: - -``` -dataset_100/train_images/n03982430_23191.jpeg 1 -dataset_100/train_images/n04461696_23653.jpeg 7 -dataset_100/train_images/n02441942_3170.jpeg 8 -dataset_100/train_images/n03733281_31716.jpeg 2 -dataset_100/train_images/n03424325_240.jpeg 0 -dataset_100/train_images/n02643566_75.jpeg 8 -``` - ## 训练模型 ### 初始化 @@ -25,14 +11,14 @@ dataset_100/train_images/n02643566_75.jpeg 8 ```python import gzip +import paddle.v2.dataset.flowers as flowers import paddle.v2 as paddle import reader import vgg import resnet import alexnet import googlenet -import argparse -import os + # PaddlePaddle init paddle.init(use_gpu=False, trainer_count=1) @@ -44,7 +30,7 @@ paddle.init(use_gpu=False, trainer_count=1) ```python DATA_DIM = 3 * 224 * 224 -CLASS_DIM = 100 +CLASS_DIM = 102 BATCH_SIZE = 128 image = paddle.layer.data( @@ -128,9 +114,35 @@ optimizer = paddle.optimizer.Momentum( $$ lr = lr_{0} * a^ {\lfloor \frac{n}{ b}\rfloor} $$ -### 定义数据读取方法和事件处理程序 +### 定义数据读取 + +首先以[花卉数据](http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html)为例说明如何定义输入。下面的代码定义了花卉数据训练集和验证集的输入: + +```python +train_reader = paddle.batch( + paddle.reader.shuffle( + flowers.train(), + buf_size=1000), + batch_size=BATCH_SIZE) +test_reader = paddle.batch( + flowers.valid(), + batch_size=BATCH_SIZE) +``` + +若需要使用其他数据,则需要先建立图像列表文件。`reader.py`定义了这种文件的读取方式,它从图像列表文件中解析出图像路径和类别标签。 + +图像列表文件是一个文本文件,其中每一行由一个图像路径和类别标签构成,二者以跳格符(Tab)隔开。类别标签用整数表示,其最小值为0。下面给出一个图像列表文件的片段示例: -读取数据时需要分别指定训练集和验证集的图像列表文件,这里假设这两个文件分别为`train.list`和`val.list`。 +``` +dataset_100/train_images/n03982430_23191.jpeg 1 +dataset_100/train_images/n04461696_23653.jpeg 7 +dataset_100/train_images/n02441942_3170.jpeg 8 +dataset_100/train_images/n03733281_31716.jpeg 2 +dataset_100/train_images/n03424325_240.jpeg 0 +dataset_100/train_images/n02643566_75.jpeg 8 +``` + +训练时需要分别指定训练集和验证集的图像列表文件。这里假设这两个文件分别为`train.list`和`val.list`,数据读取方式如下: ```python train_reader = paddle.batch( @@ -141,7 +153,10 @@ train_reader = paddle.batch( test_reader = paddle.batch( reader.train_reader('val.list'), batch_size=BATCH_SIZE) +``` +### 定义事件处理程序 +```python # End batch and end pass event handler def event_handler(event): if isinstance(event, paddle.event.EndIteration): @@ -185,3 +200,38 @@ trainer = paddle.trainer.SGD( trainer.train( reader=train_reader, num_passes=200, event_handler=event_handler) ``` + +## 应用模型 +模型训练好后,可以使用下面的代码预测给定图片的类别。 + +```python +# load parameters +with gzip.open('params_pass_10.tar.gz', 'r') as f: + parameters = paddle.parameters.Parameters.from_tar(f) + +def load_image(file): + im = Image.open(file) + im = im.resize((224, 224), Image.ANTIALIAS) + im = np.array(im).astype(np.float32) + # The storage order of the loaded image is W(widht), + # H(height), C(channel). PaddlePaddle requires + # the CHW order, so transpose them. + im = im.transpose((2, 0, 1)) # CHW + # In the training phase, the channel order of CIFAR + # image is B(Blue), G(green), R(Red). But PIL open + # image in RGB mode. It must swap the channel order. + im = im[(2, 1, 0), :, :] # BGR + im = im.flatten() + im = im / 255.0 + return im + +file_list = [line.strip() for line in open(image_list_file)] +test_data = [(load_image(image_file),) for image_file in file_list] +probs = paddle.infer( + output_layer=out, parameters=parameters, input=test_data) +lab = np.argsort(-probs) +for file_name, result in zip(file_list, lab): + print "Label of %s is: %d" % (file_name, result[0]) +``` + +首先从文件中加载训练好的模型(代码里以第10轮迭代的结果为例),然后读取`image_list_file`中的图像。`image_list_file`是一个文本文件,每一行为一个图像路径。`load_image`是一个加载图像的函数。代码使用`paddle.infer`判断`image_list_file`中每个图像的类别,并进行输出。 diff --git a/image_classification/infer.py b/image_classification/infer.py new file mode 100644 index 00000000..c48a2933 --- /dev/null +++ b/image_classification/infer.py @@ -0,0 +1,83 @@ +import gzip +import paddle.v2 as paddle +import reader +import vgg +import resnet +import alexnet +import googlenet +import argparse +import os +from PIL import Image +import numpy as np + +WIDTH = 224 +HEIGHT = 224 +DATA_DIM = 3 * WIDTH * HEIGHT +CLASS_DIM = 102 + + +def main(): + # parse the argument + parser = argparse.ArgumentParser() + parser.add_argument( + 'data_list', + help='The path of data list file, which consists of one image path per line' + ) + parser.add_argument( + 'model', + help='The model for image classification', + choices=['alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet']) + parser.add_argument( + 'params_path', help='The file which stores the parameters') + args = parser.parse_args() + + # PaddlePaddle init + paddle.init(use_gpu=True, trainer_count=1) + + image = paddle.layer.data( + name="image", type=paddle.data_type.dense_vector(DATA_DIM)) + + if args.model == 'alexnet': + out = alexnet.alexnet(image, class_dim=CLASS_DIM) + elif args.model == 'vgg13': + out = vgg.vgg13(image, class_dim=CLASS_DIM) + elif args.model == 'vgg16': + out = vgg.vgg16(image, class_dim=CLASS_DIM) + elif args.model == 'vgg19': + out = vgg.vgg19(image, class_dim=CLASS_DIM) + elif args.model == 'resnet': + out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM) + elif args.model == 'googlenet': + out, _, _ = googlenet.googlenet(image, class_dim=CLASS_DIM) + + # load parameters + with gzip.open(args.params_path, 'r') as f: + parameters = paddle.parameters.Parameters.from_tar(f) + + def load_image(file): + im = Image.open(file) + im = im.resize((WIDTH, HEIGHT), Image.ANTIALIAS) + im = np.array(im).astype(np.float32) + # The storage order of the loaded image is W(widht), + # H(height), C(channel). PaddlePaddle requires + # the CHW order, so transpose them. + im = im.transpose((2, 0, 1)) # CHW + # In the training phase, the channel order of CIFAR + # image is B(Blue), G(green), R(Red). But PIL open + # image in RGB mode. It must swap the channel order. + im = im[(2, 1, 0), :, :] # BGR + im = im.flatten() + im = im / 255.0 + return im + + file_list = [line.strip() for line in open(args.data_list)] + test_data = [(load_image(image_file), ) for image_file in file_list] + probs = paddle.infer( + output_layer=out, parameters=parameters, input=test_data) + lab = np.argsort(-probs) + for file_name, result in zip(file_list, lab): + print "Label of %s is: %d" % (file_name, result[0]) + + +if __name__ == '__main__': + main() diff --git a/image_classification/resnet.py b/image_classification/resnet.py index 63bc4409..9c3c46d8 100644 --- a/image_classification/resnet.py +++ b/image_classification/resnet.py @@ -22,36 +22,36 @@ def conv_bn_layer(input, return paddle.layer.batch_norm(input=tmp, act=active_type) -def shortcut(input, n_out, stride, b_projection): - if b_projection: - return conv_bn_layer(input, n_out, 1, stride, 0, +def shortcut(input, ch_in, ch_out, stride): + if ch_in != ch_out: + return conv_bn_layer(input, ch_out, 1, stride, 0, paddle.activation.Linear()) else: return input -def basicblock(input, ch_out, stride, b_projection): +def basicblock(input, ch_in, ch_out, stride): + short = shortcut(input, ch_in, ch_out, stride) conv1 = conv_bn_layer(input, ch_out, 3, stride, 1) conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, paddle.activation.Linear()) - short = shortcut(input, ch_out, stride, b_projection) return paddle.layer.addto( - input=[conv2, short], act=paddle.activation.Relu()) + input=[short, conv2], act=paddle.activation.Relu()) -def bottleneck(input, ch_out, stride, b_projection): +def bottleneck(input, ch_in, ch_out, stride): + short = shortcut(input, ch_in, ch_out * 4, stride) conv1 = conv_bn_layer(input, ch_out, 1, stride, 0) conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1) conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0, paddle.activation.Linear()) - short = shortcut(input, ch_out * 4, stride, b_projection) return paddle.layer.addto( - input=[conv3, short], act=paddle.activation.Relu()) + input=[short, conv3], act=paddle.activation.Relu()) -def layer_warp(block_func, input, features, count, stride): - conv = block_func(input, features, stride, True) +def layer_warp(block_func, input, ch_in, ch_out, count, stride): + conv = block_func(input, ch_in, ch_out, stride) for i in range(1, count): - conv = block_func(conv, features, 1, False) + conv = block_func(conv, ch_in, ch_out, 1) return conv @@ -67,10 +67,10 @@ def resnet_imagenet(input, depth=50, class_dim=100): conv1 = conv_bn_layer( input, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) pool1 = paddle.layer.img_pool(input=conv1, pool_size=3, stride=2) - res1 = layer_warp(block_func, pool1, 64, stages[0], 1) - res2 = layer_warp(block_func, res1, 128, stages[1], 2) - res3 = layer_warp(block_func, res2, 256, stages[2], 2) - res4 = layer_warp(block_func, res3, 512, stages[3], 2) + res1 = layer_warp(block_func, pool1, 64, 64, stages[0], 1) + res2 = layer_warp(block_func, res1, 64, 128, stages[1], 2) + res3 = layer_warp(block_func, res2, 128, 256, stages[2], 2) + res4 = layer_warp(block_func, res3, 256, 512, stages[3], 2) pool2 = paddle.layer.img_pool( input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) out = paddle.layer.fc( diff --git a/image_classification/train.py b/image_classification/train.py index 36135616..0a3fdb49 100755 --- a/image_classification/train.py +++ b/image_classification/train.py @@ -1,4 +1,5 @@ import gzip +import paddle.v2.dataset.flowers as flowers import paddle.v2 as paddle import reader import vgg @@ -6,19 +7,15 @@ import resnet import alexnet import googlenet import argparse -import os DATA_DIM = 3 * 224 * 224 -CLASS_DIM = 100 +CLASS_DIM = 102 BATCH_SIZE = 128 def main(): # parse the argument parser = argparse.ArgumentParser() - parser.add_argument( - 'data_dir', - help='The data directory which contains train.list and val.list') parser.add_argument( 'model', help='The model for image classification', @@ -71,11 +68,15 @@ def main(): train_reader = paddle.batch( paddle.reader.shuffle( - reader.test_reader(os.path.join(args.data_dir, 'train.list')), + flowers.train(), + # To use other data, replace the above line with: + # reader.test_reader('train.list'), buf_size=1000), batch_size=BATCH_SIZE) test_reader = paddle.batch( - reader.train_reader(os.path.join(args.data_dir, 'val.list')), + flowers.valid(), + # To use other data, replace the above line with: + # reader.train_reader('val.list'), batch_size=BATCH_SIZE) # End batch and end pass event handler -- GitLab