diff --git a/image_classification/classify.py b/image_classification/classify.py index b853991b238868efad0dfa5cb780660ace894f67..acd0d3cba5ad8f91ca3edf66edcc18a0fd519400 100644 --- a/image_classification/classify.py +++ b/image_classification/classify.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os,sys +import os, sys import cPickle import numpy as np from PIL import Image @@ -24,7 +24,8 @@ from paddle.trainer.PyDataProvider2 import dense_vector from paddle.trainer.config_parser import parse_config import logging -logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') +logging.basicConfig( + format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') logging.getLogger().setLevel(logging.INFO) @@ -32,24 +33,28 @@ def vis_square(data, fname): import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt - """Take an array of shape (n, height, width) or (n, height, width, 3) and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)""" # normalize data for display data = (data - data.min()) / (data.max() - data.min()) # force the number of filters to be square n = int(np.ceil(np.sqrt(data.shape[0]))) - padding = (((0, n ** 2 - data.shape[0]), - (0, 1), (0, 1)) # add some space between filters - + ((0, 0),) * (data.ndim - 3)) # don't pad the last dimension (if there is one) - data = np.pad(data, padding, mode='constant', constant_values=1) # pad with ones (white) + padding = ( + ((0, n**2 - data.shape[0]), (0, 1), + (0, 1)) # add some space between filters + + ((0, 0), ) * + (data.ndim - 3)) # don't pad the last dimension (if there is one) + data = np.pad(data, padding, mode='constant', + constant_values=1) # pad with ones (white) # tile the filters into an image - data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) + data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple( + range(4, data.ndim + 1))) data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) plt.imshow(data, cmap='gray') plt.savefig(fname) plt.axis('off') + class ImageClassifier(): def __init__(self, train_conf, @@ -70,24 +75,25 @@ class ImageClassifier(): self.oversample = oversample self.is_color = is_color - self.transformer = image_util.ImageTransformer(is_color = is_color) - self.transformer.set_transpose((2,0,1)) - self.transformer.set_channel_swap((2,1,0)) + self.transformer = image_util.ImageTransformer(is_color=is_color) + self.transformer.set_transpose((2, 0, 1)) + self.transformer.set_channel_swap((2, 1, 0)) self.mean_file = mean_file if self.mean_file is not None: mean = np.load(self.mean_file)['mean'] mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) - self.transformer.set_mean(mean) # mean pixel + self.transformer.set_mean(mean) # mean pixel else: # if you use three mean value, set like: # this three mean value is calculated from ImageNet. - self.transformer.set_mean(np.array([103.939,116.779,123.68])) + self.transformer.set_mean(np.array([103.939, 116.779, 123.68])) conf_args = "use_gpu=%d,is_predict=1" % (int(use_gpu)) conf = parse_config(train_conf, conf_args) swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu))) - self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config) + self.network = swig_paddle.GradientMachine.createFromConfigProto( + conf.model_config) assert isinstance(self.network, swig_paddle.GradientMachine) self.network.loadParameters(self.model_dir) @@ -112,14 +118,14 @@ class ImageClassifier(): if self.oversample: image = image_util.resize_image(image, self.resize_dim) image = np.array(image) - input = np.zeros((1, image.shape[0], image.shape[1], 3), - dtype=np.float32) + input = np.zeros( + (1, image.shape[0], image.shape[1], 3), dtype=np.float32) input[0] = image.astype(np.float32) input = image_util.oversample(input, self.crop_dims) else: image = image.resize(self.crop_dims, Image.ANTIALIAS) - input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3), - dtype=np.float32) + input = np.zeros( + (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32) input[0] = np.array(image).astype(np.float32) data_in = [] @@ -146,63 +152,75 @@ class ImageClassifier(): res[name] = output[name].mean(0) return res + def option_parser(): usage = "%prog -c config -i data_list -w model_dir [options]" parser = OptionParser(usage="usage: %s" % usage) - parser.add_option("--job", - action="store", - dest="job_type", - choices=['predict', 'extract',], - default='predict', - help="The job type. \ + parser.add_option( + "--job", + action="store", + dest="job_type", + choices=[ + 'predict', + 'extract', + ], + default='predict', + help="The job type. \ predict: predicting,\ extract: extract features") - parser.add_option("--conf", - action="store", - dest="train_conf", - default='models/vgg.py', - help="network config") - parser.add_option("--data", - action="store", - dest="data_file", - default='image/dog.png', - help="image list") - parser.add_option("--model", - action="store", - dest="model_path", - default=None, - help="model path") - parser.add_option("-c", - dest="cpu_gpu", - action="store_false", - help="Use cpu mode.") - parser.add_option("-g", - dest="cpu_gpu", - default=True, - action="store_true", - help="Use gpu mode.") - parser.add_option("--mean", - action="store", - dest="mean", - default='data/mean.meta', - help="The mean file.") - parser.add_option("--multi_crop", - action="store_true", - dest="multi_crop", - default=False, - help="Wether to use multiple crops on image.") + parser.add_option( + "--conf", + action="store", + dest="train_conf", + default='models/vgg.py', + help="network config") + parser.add_option( + "--data", + action="store", + dest="data_file", + default='image/dog.png', + help="image list") + parser.add_option( + "--model", + action="store", + dest="model_path", + default=None, + help="model path") + parser.add_option( + "-c", dest="cpu_gpu", action="store_false", help="Use cpu mode.") + parser.add_option( + "-g", + dest="cpu_gpu", + default=True, + action="store_true", + help="Use gpu mode.") + parser.add_option( + "--mean", + action="store", + dest="mean", + default='data/mean.meta', + help="The mean file.") + parser.add_option( + "--multi_crop", + action="store_true", + dest="multi_crop", + default=False, + help="Wether to use multiple crops on image.") return parser.parse_args() + def main(): options, args = option_parser() mean = 'data/mean.meta' if not options.mean else options.mean conf = 'models/vgg.py' if not options.train_conf else options.train_conf - obj = ImageClassifier(conf, - 32,32, - options.model_path, - use_gpu=options.cpu_gpu, - mean_file=mean, - oversample=options.multi_crop) + obj = ImageClassifier( + conf, + 32, + 32, + options.model_path, + use_gpu=options.cpu_gpu, + mean_file=mean, + oversample=options.multi_crop) image_path = options.data_file if options.job_type == 'predict': output_layer = '__fc_layer_2__' @@ -219,5 +237,6 @@ def main(): fea = features[output_layer].reshape(dshape) vis_square(fea, 'fea_conv0.png') + if __name__ == '__main__': main() diff --git a/image_classification/dataprovider.py b/image_classification/dataprovider.py index f9921bd025d19d8102b320b8075e1643ed4e18c2..17003d78a7fa112ca306ad549b3a6b78fafe79c4 100644 --- a/image_classification/dataprovider.py +++ b/image_classification/dataprovider.py @@ -16,6 +16,7 @@ import numpy as np import cPickle from paddle.trainer.PyDataProvider2 import * + def initializer(settings, mean_path, is_train, **kwargs): settings.is_train = is_train settings.input_size = 3 * 32 * 32 @@ -37,7 +38,4 @@ def process(settings, file_list): labels = batch['labels'] for im, lab in zip(images, labels): im = im - settings.mean - yield { - 'image': im.astype('float32'), - 'label': int(lab) - } + yield {'image': im.astype('float32'), 'label': int(lab)} diff --git a/image_classification/models/resnet.py b/image_classification/models/resnet.py index a621e2ad25598914bf65dc9b89ec1a6581d3bc73..e70f84f0f5d7fc4574419418ee6ed4150023aab5 100644 --- a/image_classification/models/resnet.py +++ b/image_classification/models/resnet.py @@ -59,6 +59,7 @@ def shortcut(ipt, n_in, n_out, stride): else: return ipt + def basicblock(ipt, ch_out, stride): ch_in = ipt.num_filters tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1) @@ -66,6 +67,7 @@ def basicblock(ipt, ch_out, stride): short = shortcut(ipt, ch_in, ch_out, stride) return addto_layer(input=[ipt, short], act=ReluActivation()) + def bottleneck(ipt, ch_out, stride): ch_in = ipt.num_filter tmp = conv_bn_layer(ipt, ch_out, 1, stride, 0) @@ -74,55 +76,49 @@ def bottleneck(ipt, ch_out, stride): short = shortcut(ipt, ch_in, ch_out, stride) return addto_layer(input=[ipt, short], act=ReluActivation()) + def layer_warp(block_func, ipt, features, count, stride): tmp = block_func(ipt, features, stride) for i in range(1, count): tmp = block_func(tmp, features, 1) return tmp + def resnet_imagenet(ipt, depth=50): - cfg = {18 : ([2,2,2,1], basicblock), - 34 : ([3,4,6,3], basicblock), - 50 : ([3,4,6,3], bottleneck), - 101: ([3,4,23,3], bottleneck), - 152: ([3,8,36,3], bottleneck)} + cfg = { + 18: ([2, 2, 2, 1], basicblock), + 34: ([3, 4, 6, 3], basicblock), + 50: ([3, 4, 6, 3], bottleneck), + 101: ([3, 4, 23, 3], bottleneck), + 152: ([3, 8, 36, 3], bottleneck) + } stages, block_func = cfg[depth] - tmp = conv_bn_layer(ipt, - ch_in=3, - ch_out=64, - filter_size=7, - stride=2, - padding=3) + tmp = conv_bn_layer( + ipt, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) tmp = img_pool_layer(input=tmp, pool_size=3, stride=2) - tmp = layer_warp(block_func, tmp, 64, stages[0], 1) + tmp = layer_warp(block_func, tmp, 64, stages[0], 1) tmp = layer_warp(block_func, tmp, 128, stages[1], 2) tmp = layer_warp(block_func, tmp, 256, stages[2], 2) tmp = layer_warp(block_func, tmp, 512, stages[3], 2) - tmp = img_pool_layer(input=tmp, - pool_size=7, - stride=1, - pool_type=AvgPooling()) + tmp = img_pool_layer( + input=tmp, pool_size=7, stride=1, pool_type=AvgPooling()) tmp = fc_layer(input=tmp, size=1000, act=SoftmaxActivation()) return tmp + def resnet_cifar10(ipt, depth=56): - assert((depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110, 1202') + assert ((depth - 2) % 6 == 0, + 'depth should be one of 20, 32, 44, 56, 110, 1202') n = (depth - 2) / 6 nStages = {16, 64, 128} - tmp = conv_bn_layer(ipt, - ch_in=3, - ch_out=16, - filter_size=3, - stride=1, - padding=1) + tmp = conv_bn_layer( + ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) tmp = layer_warp(basicblock, tmp, 16, n, 1) tmp = layer_warp(basicblock, tmp, 32, n, 2) tmp = layer_warp(basicblock, tmp, 64, n, 2) - tmp = img_pool_layer(input=tmp, - pool_size=8, - stride=1, - pool_type=AvgPooling()) + tmp = img_pool_layer( + input=tmp, pool_size=8, stride=1, pool_type=AvgPooling()) return tmp diff --git a/image_classification/models/vgg.py b/image_classification/models/vgg.py index cc05b676608411a4927d9eef782dd34a35ec9a7a..ebb14b5e16cbe44fe899281d29b13f5de42fffa1 100644 --- a/image_classification/models/vgg.py +++ b/image_classification/models/vgg.py @@ -30,7 +30,7 @@ settings( learning_rate_decay_b=50000 * 100, learning_rate_schedule='discexp', learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * 128),) + regularization=L2Regularization(0.0005 * 128), ) def vgg_bn_drop(input): @@ -54,19 +54,13 @@ def vgg_bn_drop(input): tmp = conv_block(tmp, 512, 3, [0.4, 0.4, 0]) tmp = dropout_layer(input=tmp, dropout_rate=0.5) - tmp = fc_layer( - input=tmp, - size=512, - act=LinearActivation()) - tmp = batch_norm_layer(input=tmp, - act=ReluActivation(), - layer_attr=ExtraAttr(drop_rate=0.5)) - tmp = fc_layer( - input=tmp, - size=512, - act=LinearActivation()) + tmp = fc_layer(input=tmp, size=512, act=LinearActivation()) + tmp = batch_norm_layer( + input=tmp, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5)) + tmp = fc_layer(input=tmp, size=512, act=LinearActivation()) return tmp + datadim = 3 * 32 * 32 classdim = 10 data = data_layer(name='image', size=datadim)