classify.py

# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os, sys
import cPickle
import numpy as np
from PIL import Image
from optparse import OptionParser

import paddle.utils.image_util as image_util
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config

import logging
logging.basicConfig(
    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)


def vis_square(data, fname):
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    """Take an array of shape (n, height, width) or (n, height, width, 3)
       and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)"""
    # normalize data for display
    data = (data - data.min()) / (data.max() - data.min())
    # force the number of filters to be square
    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = (
        ((0, n**2 - data.shape[0]), (0, 1),
         (0, 1))  # add some space between filters
        + ((0, 0), ) *
        (data.ndim - 3))  # don't pad the last dimension (if there is one)
    data = np.pad(data, padding, mode='constant',
                  constant_values=1)  # pad with ones (white)
    # tile the filters into an image
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(
        range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    plt.imshow(data, cmap='gray')
    plt.savefig(fname)
    plt.axis('off')


class ImageClassifier():
    def __init__(self,
                 train_conf,
                 resize_dim,
                 crop_dim,
                 model_dir=None,
                 use_gpu=True,
                 mean_file=None,
                 oversample=False,
                 is_color=True):
        self.train_conf = train_conf
        self.model_dir = model_dir
        if model_dir is None:
            self.model_dir = os.path.dirname(train_conf)

        self.resize_dim = resize_dim
        self.crop_dims = [crop_dim, crop_dim]
        self.oversample = oversample
        self.is_color = is_color

        self.transformer = image_util.ImageTransformer(is_color=is_color)
        self.transformer.set_transpose((2, 0, 1))
        self.transformer.set_channel_swap((2, 1, 0))

        self.mean_file = mean_file
        if self.mean_file is not None:
            mean = np.load(self.mean_file)['mean']
            mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
            self.transformer.set_mean(mean)  # mean pixel
        else:
            # if you use three mean value, set like:
            # this three mean value is calculated from ImageNet.
            self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))

        conf_args = "use_gpu=%d,is_predict=1" % (int(use_gpu))
        conf = parse_config(train_conf, conf_args)
        swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
        self.network = swig_paddle.GradientMachine.createFromConfigProto(
            conf.model_config)
        assert isinstance(self.network, swig_paddle.GradientMachine)
        self.network.loadParameters(self.model_dir)

        dim = 3 * self.crop_dims[0] * self.crop_dims[1]
        slots = [dense_vector(dim)]
        self.converter = DataProviderConverter(slots)

    def get_data(self, img_path):
        """
        1. load image from img_path.
        2. resize or oversampling.
        3. transformer data: transpose, channel swap, sub mean.
        return K x H x W ndarray.

        img_path: image path.
        """
        image = image_util.load_image(img_path, self.is_color)
        # Another way to extract oversampled features is that
        # cropping and averaging from large feature map which is
        # calculated by large size of image.
        # This way reduces the computation.
        if self.oversample:
            image = image_util.resize_image(image, self.resize_dim)
            image = np.array(image)
            input = np.zeros(
                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
            input[0] = image.astype(np.float32)
            input = image_util.oversample(input, self.crop_dims)
        else:
            image = image.resize(self.crop_dims, Image.ANTIALIAS)
            input = np.zeros(
                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
            input[0] = np.array(image).astype(np.float32)

        data_in = []
        for img in input:
            img = self.transformer.transformer(img).flatten()
            data_in.append([img.tolist()])
        return data_in

    def forward(self, input_data):
        in_arg = self.converter(input_data)
        return self.network.forwardTest(in_arg)

    def forward(self, data, output_layer):
        input = self.converter(data)
        self.network.forwardTest(input)
        output = self.network.getLayerOutputs(output_layer)
        res = {}
        if isinstance(output_layer, basestring):
            output_layer = [output_layer]
        for name in output_layer:
            # For oversampling, average predictions across crops.
            # If not, the shape of output[name]: (1, class_number),
            # the mean is also applicable.
            res[name] = output[name].mean(0)
        return res


def option_parser():
    usage = "%prog -c config -i data_list -w model_dir [options]"
    parser = OptionParser(usage="usage: %s" % usage)
    parser.add_option(
        "--job",
        action="store",
        dest="job_type",
        choices=[
            'predict',
            'extract',
        ],
        default='predict',
        help="The job type. \
                            predict: predicting,\
                            extract: extract features")
    parser.add_option(
        "--conf",
        action="store",
        dest="train_conf",
        default='models/vgg.py',
        help="network config")
    parser.add_option(
        "--data",
        action="store",
        dest="data_file",
        default='image/dog.png',
        help="image list")
    parser.add_option(
        "--model",
        action="store",
        dest="model_path",
        default=None,
        help="model path")
    parser.add_option(
        "-c", dest="cpu_gpu", action="store_false", help="Use cpu mode.")
    parser.add_option(
        "-g",
        dest="cpu_gpu",
        default=True,
        action="store_true",
        help="Use gpu mode.")
    parser.add_option(
        "--mean",
        action="store",
        dest="mean",
        default='data/mean.meta',
        help="The mean file.")
    parser.add_option(
        "--multi_crop",
        action="store_true",
        dest="multi_crop",
        default=False,
        help="Wether to use multiple crops on image.")
    return parser.parse_args()


def main():
    options, args = option_parser()
    mean = 'data/mean.meta' if not options.mean else options.mean
    conf = 'models/vgg.py' if not options.train_conf else options.train_conf
    obj = ImageClassifier(
        conf,
        32,
        32,
        options.model_path,
        use_gpu=options.cpu_gpu,
        mean_file=mean,
        oversample=options.multi_crop)
    image_path = options.data_file
    if options.job_type == 'predict':
        output_layer = '__fc_layer_2__'
        data = obj.get_data(image_path)
        prob = obj.forward(data, output_layer)
        lab = np.argsort(-prob[output_layer])
        logging.info("Label of %s is: %d", image_path, lab[0])

    elif options.job_type == "extract":
        output_layer = '__conv_0__'
        data = obj.get_data(options.data_file)
        features = obj.forward(data, output_layer)
        dshape = (64, 32, 32)
        fea = features[output_layer].reshape(dshape)
        vis_square(fea, 'fea_conv0.png')


if __name__ == '__main__':
    main()