提交 65b20788 编写于 作者: D dangqingqing

finish README.md and update code

上级 3088707a
此差异已折叠。
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,44 +12,54 @@ ...@@ -12,44 +12,54 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os, sys import os,sys
import cPickle
import numpy as np import numpy as np
import logging
from PIL import Image from PIL import Image
from optparse import OptionParser from optparse import OptionParser
import paddle.utils.image_util as image_util import paddle.utils.image_util as image_util
from py_paddle import swig_paddle, DataProviderConverter from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config from paddle.trainer.config_parser import parse_config
logging.basicConfig( import logging
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
def vis_square(data, fname):
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
"""Take an array of shape (n, height, width) or (n, height, width, 3)
and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)"""
# normalize data for display
data = (data - data.min()) / (data.max() - data.min())
# force the number of filters to be square
n = int(np.ceil(np.sqrt(data.shape[0])))
padding = (((0, n ** 2 - data.shape[0]),
(0, 1), (0, 1)) # add some space between filters
+ ((0, 0),) * (data.ndim - 3)) # don't pad the last dimension (if there is one)
data = np.pad(data, padding, mode='constant', constant_values=1) # pad with ones (white)
# tile the filters into an image
data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
plt.imshow(data, cmap='gray')
plt.savefig(fname)
plt.axis('off')
class ImageClassifier(): class ImageClassifier():
def __init__(self, def __init__(self,
train_conf, train_conf,
use_gpu=True, resize_dim,
crop_dim,
model_dir=None, model_dir=None,
resize_dim=None, use_gpu=True,
crop_dim=None,
mean_file=None, mean_file=None,
oversample=False, oversample=False,
is_color=True): is_color=True):
"""
train_conf: network configure.
model_dir: string, directory of model.
resize_dim: int, resized image size.
crop_dim: int, crop size.
mean_file: string, image mean file.
oversample: bool, oversample means multiple crops, namely five
patches (the four corner patches and the center
patch) as well as their horizontal reflections,
ten crops in all.
"""
self.train_conf = train_conf self.train_conf = train_conf
self.model_dir = model_dir self.model_dir = model_dir
if model_dir is None: if model_dir is None:
...@@ -60,47 +70,56 @@ class ImageClassifier(): ...@@ -60,47 +70,56 @@ class ImageClassifier():
self.oversample = oversample self.oversample = oversample
self.is_color = is_color self.is_color = is_color
self.transformer = image_util.ImageTransformer(is_color=is_color) self.transformer = image_util.ImageTransformer(is_color = is_color)
self.transformer.set_transpose((2, 0, 1)) self.transformer.set_transpose((2,0,1))
self.transformer.set_channel_swap((2,1,0))
self.mean_file = mean_file self.mean_file = mean_file
mean = np.load(self.mean_file)['data_mean'] if self.mean_file is not None:
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) mean = np.load(self.mean_file)['mean']
self.transformer.set_mean(mean) # mean pixel mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
gpu = 1 if use_gpu else 0 self.transformer.set_mean(mean) # mean pixel
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu) else:
# if you use three mean value, set like:
# this three mean value is calculated from ImageNet.
self.transformer.set_mean(np.array([103.939,116.779,123.68]))
conf_args = "use_gpu=%d,is_predict=1" % (int(use_gpu))
conf = parse_config(train_conf, conf_args) conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (gpu)) swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
self.network = swig_paddle.GradientMachine.createFromConfigProto( self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine) assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir) self.network.loadParameters(self.model_dir)
data_size = 3 * self.crop_dims[0] * self.crop_dims[1] dim = 3 * self.crop_dims[0] * self.crop_dims[1]
slots = [dense_vector(data_size)] slots = [dense_vector(dim)]
self.converter = DataProviderConverter(slots) self.converter = DataProviderConverter(slots)
def get_data(self, img_path): def get_data(self, img_path):
""" """
1. load image from img_path. 1. load image from img_path.
2. resize or oversampling. 2. resize or oversampling.
3. transformer data: transpose, sub mean. 3. transformer data: transpose, channel swap, sub mean.
return K x H x W ndarray. return K x H x W ndarray.
img_path: image path. img_path: image path.
""" """
image = image_util.load_image(img_path, self.is_color) image = image_util.load_image(img_path, self.is_color)
# Another way to extract oversampled features is that
# cropping and averaging from large feature map which is
# calculated by large size of image.
# This way reduces the computation.
if self.oversample: if self.oversample:
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim) image = image_util.resize_image(image, self.resize_dim)
image = np.array(image) image = np.array(image)
input = np.zeros( input = np.zeros((1, image.shape[0], image.shape[1], 3),
(1, image.shape[0], image.shape[1], 3), dtype=np.float32) dtype=np.float32)
input[0] = image.astype(np.float32) input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims) input = image_util.oversample(input, self.crop_dims)
else: else:
image = image.resize(self.crop_dims, Image.ANTIALIAS) image = image.resize(self.crop_dims, Image.ANTIALIAS)
input = np.zeros( input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
(1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32) dtype=np.float32)
input[0] = np.array(image).astype(np.float32) input[0] = np.array(image).astype(np.float32)
data_in = [] data_in = []
...@@ -114,46 +133,91 @@ class ImageClassifier(): ...@@ -114,46 +133,91 @@ class ImageClassifier():
return self.network.forwardTest(in_arg) return self.network.forwardTest(in_arg)
def forward(self, data, output_layer): def forward(self, data, output_layer):
"""
input_data: py_paddle input data.
output_layer: specify the name of probability, namely the layer with
softmax activation.
return: the predicting probability of each label.
"""
input = self.converter(data) input = self.converter(data)
self.network.forwardTest(input) self.network.forwardTest(input)
output = self.network.getLayerOutputs(output_layer) output = self.network.getLayerOutputs(output_layer)
# For oversampling, average predictions across crops. res = {}
# If not, the shape of output[name]: (1, class_number), if isinstance(output_layer, basestring):
# the mean is also applicable. output_layer = [output_layer]
return output[output_layer].mean(0) for name in output_layer:
# For oversampling, average predictions across crops.
def predict(self, image=None, output_layer=None): # If not, the shape of output[name]: (1, class_number),
assert isinstance(image, basestring) # the mean is also applicable.
assert isinstance(output_layer, basestring) res[name] = output[name].mean(0)
data = self.get_data(image) return res
prob = self.forward(data, output_layer)
lab = np.argsort(-prob) def option_parser():
logging.info("Label of %s is: %d", image, lab[0]) usage = "%prog -c config -i data_list -w model_dir [options]"
parser = OptionParser(usage="usage: %s" % usage)
parser.add_option("--job",
action="store",
dest="job_type",
choices=['predict', 'extract',],
default='predict',
help="The job type. \
predict: predicting,\
extract: extract features")
parser.add_option("--conf",
action="store",
dest="train_conf",
default='models/vgg.py',
help="network config")
parser.add_option("--data",
action="store",
dest="data_file",
default='image/dog.png',
help="image list")
parser.add_option("--model",
action="store",
dest="model_path",
default=None,
help="model path")
parser.add_option("-c",
dest="cpu_gpu",
action="store_false",
help="Use cpu mode.")
parser.add_option("-g",
dest="cpu_gpu",
default=True,
action="store_true",
help="Use gpu mode.")
parser.add_option("--mean",
action="store",
dest="mean",
default='data/mean.meta',
help="The mean file.")
parser.add_option("--multi_crop",
action="store_true",
dest="multi_crop",
default=False,
help="Wether to use multiple crops on image.")
return parser.parse_args()
def main():
options, args = option_parser()
mean = 'data/mean.meta' if not options.mean else options.mean
conf = 'models/vgg.py' if not options.train_conf else options.train_conf
obj = ImageClassifier(conf,
32,32,
options.model_path,
use_gpu=options.cpu_gpu,
mean_file=mean,
oversample=options.multi_crop)
image_path = options.data_file
if options.job_type == 'predict':
output_layer = '__fc_layer_2__'
data = obj.get_data(image_path)
prob = obj.forward(data, output_layer)
lab = np.argsort(-prob[output_layer])
logging.info("Label of %s is: %d", image_path, lab[0])
elif options.job_type == "extract":
output_layer = '__conv_0__'
data = obj.get_data(options.data_file)
features = obj.forward(data, output_layer)
dshape = (64, 32, 32)
fea = features[output_layer].reshape(dshape)
vis_square(fea, 'fea_conv0.png')
if __name__ == '__main__': if __name__ == '__main__':
image_size = 32 main()
crop_size = 32
multi_crop = True
config = "vgg_16_cifar.py"
output_layer = "__fc_layer_1__"
mean_path = "data/batches.meta"
model_path = sys.argv[1]
image = sys.argv[2]
use_gpu = bool(int(sys.argv[3]))
obj = ImageClassifier(
train_conf=config,
model_dir=model_path,
resize_dim=image_size,
crop_dim=crop_size,
mean_file=mean_path,
use_gpu=use_gpu,
oversample=multi_crop)
obj.predict(image, output_layer)
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
import numpy as np import numpy as np
import cPickle import cPickle
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
def initializer(settings, mean_path, is_train, **kwargs): def initializer(settings, mean_path, is_train, **kwargs):
......
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
python classify.py --job=extract --model=output/pass-00299 --data=image/dog.png # -c
image_classification/image/resnet.png

343.4 KB | W: | H:

image_classification/image/resnet.png

346.4 KB | W: | H:

image_classification/image/resnet.png
image_classification/image/resnet.png
image_classification/image/resnet.png
image_classification/image/resnet.png
  • 2-up
  • Swipe
  • Onion skin
...@@ -14,13 +14,33 @@ ...@@ -14,13 +14,33 @@
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
is_predict = get_config_arg("is_predict", bool, False)
if not is_predict:
args = {'meta': 'data/mean.meta'}
define_py_data_sources2(
train_list='data/train.list',
test_list='data/test.list',
module='dataprovider',
obj='process',
args=args)
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0001 * 128))
def conv_bn_layer(input, def conv_bn_layer(input,
ch_out, ch_out,
filter_size, filter_size,
stride, stride,
padding, padding,
ch_in=None, active_type=ReluActivation(),
active_type=ReluActivation()): ch_in=None):
tmp = img_conv_layer( tmp = img_conv_layer(
input=input, input=input,
filter_size=filter_size, filter_size=filter_size,
...@@ -35,16 +55,16 @@ def conv_bn_layer(input, ...@@ -35,16 +55,16 @@ def conv_bn_layer(input,
def shortcut(ipt, n_in, n_out, stride): def shortcut(ipt, n_in, n_out, stride):
if n_in != n_out: if n_in != n_out:
return conv_bn_layer(ipt, n_out, 1, stride=stride, LinearActivation()) return conv_bn_layer(ipt, n_out, 1, stride, 0, LinearActivation())
else: else:
return ipt return ipt
def basicblock(ipt, ch_out, stride): def basicblock(ipt, ch_out, stride):
ch_in = ipt.num_filter ch_in = ipt.num_filters
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1) tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, LinearActivation()) tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, LinearActivation())
short = shortcut(ipt, ch_in, ch_out, stride) short = shortcut(ipt, ch_in, ch_out, stride)
return addto_layer(input=[input, short], act=ReluActivation()) return addto_layer(input=[ipt, short], act=ReluActivation())
def bottleneck(ipt, ch_out, stride): def bottleneck(ipt, ch_out, stride):
ch_in = ipt.num_filter ch_in = ipt.num_filter
...@@ -52,13 +72,13 @@ def bottleneck(ipt, ch_out, stride): ...@@ -52,13 +72,13 @@ def bottleneck(ipt, ch_out, stride):
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1) tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1)
tmp = conv_bn_layer(tmp, ch_out * 4, 1, 1, 0, LinearActivation()) tmp = conv_bn_layer(tmp, ch_out * 4, 1, 1, 0, LinearActivation())
short = shortcut(ipt, ch_in, ch_out, stride) short = shortcut(ipt, ch_in, ch_out, stride)
return addto_layer(input=[input, short], act=ReluActivation()) return addto_layer(input=[ipt, short], act=ReluActivation())
def layer_warp(block_func, ipt, features, count, stride): def layer_warp(block_func, ipt, features, count, stride):
tmp = block_func(tmp, features, stride) tmp = block_func(ipt, features, stride)
for i in range(1, count): for i in range(1, count):
tmp = block_func(tmp, features, 1) tmp = block_func(tmp, features, 1)
return tmp return tmp
def resnet_imagenet(ipt, depth=50): def resnet_imagenet(ipt, depth=50):
cfg = {18 : ([2,2,2,1], basicblock), cfg = {18 : ([2,2,2,1], basicblock),
...@@ -96,42 +116,23 @@ def resnet_cifar10(ipt, depth=56): ...@@ -96,42 +116,23 @@ def resnet_cifar10(ipt, depth=56):
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1) padding=1)
tmp = layer_warp(basicblock, tmp, 16, n) tmp = layer_warp(basicblock, tmp, 16, n, 1)
tmp = layer_warp(basicblock, tmp, 32, n, 2) tmp = layer_warp(basicblock, tmp, 32, n, 2)
tmp = layer_warp(basicblock, tmp, 64, n, 2) tmp = layer_warp(basicblock, tmp, 64, n, 2)
tmp = img_pool_layer(input=tmp, tmp = img_pool_layer(input=tmp,
pool_size=8, pool_size=8,
stride=1, stride=1,
pool_type=AvgPooling()) pool_type=AvgPooling())
tmp = fc_layer(input=tmp, size=10, act=SoftmaxActivation())
return tmp return tmp
is_predict = get_config_arg("is_predict", bool, False) datadim = 3 * 32 * 32
if not is_predict: classdim = 10
args = {'meta': 'data/mean.meta'} data = data_layer(name='image', size=datadim)
define_py_data_sources2( net = resnet_cifar10(data, depth=56)
train_list='data/train.list', out = fc_layer(input=net, size=10, act=SoftmaxActivation())
test_list='data/test.list',
module='dataprovider',
obj='process',
args=args)
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128))
data_size = 3 * 32 * 32
class_num = 10
data = data_layer(name='image', size=data_size)
out = resnet_cifar10(data, depth=50)
if not is_predict: if not is_predict:
lbl = data_layer(name="label", size=class_num) lbl = data_layer(name="label", size=classdim)
outputs(classification_cost(input=out, label=lbl)) outputs(classification_cost(input=out, label=lbl))
else: else:
outputs(out) outputs(out)
...@@ -14,11 +14,30 @@ ...@@ -14,11 +14,30 @@
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
def vgg_bn_drop(input, num_channels): is_predict = get_config_arg("is_predict", bool, False)
def conv_block(ipt, num_filter, groups, dropouts, num_channels_=None): if not is_predict:
define_py_data_sources2(
train_list='data/train.list',
test_list='data/test.list',
module='dataprovider',
obj='process',
args={'mean_path': 'data/mean.meta'})
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128),)
def vgg_bn_drop(input):
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
return img_conv_group( return img_conv_group(
input=ipt, input=ipt,
num_channels=num_channels_, num_channels=num_channels,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
conv_num_filter=[num_filter] * groups, conv_num_filter=[num_filter] * groups,
...@@ -28,7 +47,7 @@ def vgg_bn_drop(input, num_channels): ...@@ -28,7 +47,7 @@ def vgg_bn_drop(input, num_channels):
conv_batchnorm_drop_rate=dropouts, conv_batchnorm_drop_rate=dropouts,
pool_type=MaxPooling()) pool_type=MaxPooling())
tmp = conv_block(input, 64, 2, [0.3, 0], num_channels) tmp = conv_block(input, 64, 2, [0.3, 0], 3)
tmp = conv_block(tmp, 128, 2, [0.4, 0]) tmp = conv_block(tmp, 128, 2, [0.4, 0])
tmp = conv_block(tmp, 256, 3, [0.4, 0.4, 0]) tmp = conv_block(tmp, 256, 3, [0.4, 0.4, 0])
tmp = conv_block(tmp, 512, 3, [0.4, 0.4, 0]) tmp = conv_block(tmp, 512, 3, [0.4, 0.4, 0])
...@@ -46,33 +65,16 @@ def vgg_bn_drop(input, num_channels): ...@@ -46,33 +65,16 @@ def vgg_bn_drop(input, num_channels):
input=tmp, input=tmp,
size=512, size=512,
act=LinearActivation()) act=LinearActivation())
tmp = fc_layer(input=tmp, size=10, act=SoftmaxActivation())
return tmp return tmp
is_predict = get_config_arg("is_predict", bool, False) datadim = 3 * 32 * 32
if not is_predict: classdim = 10
define_py_data_sources2( data = data_layer(name='image', size=datadim)
train_list='data/train.list', net = vgg_bn_drop(data)
test_list='data/test.list', out = fc_layer(input=net, size=classdim, act=SoftmaxActivation())
module='dataprovider',
obj='process',
args={'mean_path': 'data/mean.meta'})
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128),)
data_size = 3 * 32 * 32
class_num = 10
data = data_layer(name='image', size=data_size)
out = vgg_bn_drop(data, 3)
if not is_predict: if not is_predict:
lbl = data_layer(name="label", size=class_num) lbl = data_layer(name="label", size=classdim)
outputs(classification_cost(input=out, label=lbl)) cost = classification_cost(input=out, label=lbl)
outputs(cost)
else: else:
outputs(out) outputs(out)
...@@ -14,7 +14,4 @@ ...@@ -14,7 +14,4 @@
# limitations under the License. # limitations under the License.
set -e set -e
model=output/pass-00299/ python classify.py --job=predict --model=output/pass-00299 --data=image/dog.png # -c
image=data/cifar-out/test/airplane/seaplane_s_000978.png
use_gpu=1
python prediction.py $model $image $use_gpu
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
# limitations under the License. # limitations under the License.
set -e set -e
#config=models/resnet.py config=models/resnet.py
config=models/vgg.py #config=models/vgg.py
output=./output output=output
log=train.log log=train.log
paddle train \ paddle train \
...@@ -26,4 +26,4 @@ paddle train \ ...@@ -26,4 +26,4 @@ paddle train \
--log_period=100 \ --log_period=100 \
--num_passes=300 \ --num_passes=300 \
--save_dir=$output --save_dir=$output
2>&1 | tee $log #2>&1 | tee $log
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册