提交 81887b2a 编写于 作者: T Tao Luo

remove legace v2 code in python/paddle/utils

上级 ace33d7f
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.config_parser import parse_config
from paddle.proto import TrainerConfig_pb2
import sys
__all__ = []
if __name__ == '__main__':
whole_conf = False
binary = False
if len(sys.argv) == 2:
conf = parse_config(sys.argv[1], '')
elif len(sys.argv) == 3:
conf = parse_config(sys.argv[1], sys.argv[2])
elif len(sys.argv) == 4:
conf = parse_config(sys.argv[1], sys.argv[2])
if sys.argv[3] == '--whole':
whole_conf = True
elif sys.argv[3] == '--binary':
binary = True
else:
raise RuntimeError()
assert isinstance(conf, TrainerConfig_pb2.TrainerConfig)
if whole_conf:
print(conf)
else:
if binary:
sys.stdout.write(conf.model_config.SerializeToString())
else:
print(conf.model_config)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
from paddle.trainer_config_helpers.layers import LayerOutput
from paddle.v2.layer import parse_network
from paddle.proto import TrainerConfig_pb2
__all__ = ["dump_v2_config"]
def dump_v2_config(topology, save_path, binary=False):
""" Dump the network topology to a specified file.
This function is only used to dump network defined by using PaddlePaddle V2
APIs. This function will NOT dump configurations related to PaddlePaddle
optimizer.
:param topology: The output layers (can be more than one layers given in a
Python List or Tuple) of the entire network. Using the
specified layers (if more than one layer is given) as root,
traversing back to the data layer(s), all the layers
connected to the specified output layers will be dumped.
Layers not connceted to the specified will not be dumped.
:type topology: LayerOutput|List|Tuple
:param save_path: The path to save the dumped network topology.
:type save_path: str
:param binary: Whether to dump the serialized network topology or not.
The default value is false. NOTE that, if you call this
function to generate network topology for PaddlePaddle C-API,
a serialized version of network topology is required. When
using PaddlePaddle C-API, this flag MUST be set to True.
:type binary: bool
"""
if isinstance(topology, LayerOutput):
topology = [topology]
elif isinstance(topology, collections.Sequence):
for out_layer in topology:
assert isinstance(out_layer, LayerOutput), (
"The type of each element in the parameter topology "
"should be LayerOutput.")
else:
raise RuntimeError("Error input type for parameter topology.")
model_str = parse_network(topology)
with open(save_path, "w") as fout:
if binary:
fout.write(model_str.SerializeToString())
else:
fout.write(str(model_str))
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os, sys
import numpy as np
from PIL import Image
import six
from six.moves import cStringIO as StringIO
import multiprocessing
import functools
import itertools
from paddle.utils.image_util import *
from paddle.trainer.config_parser import logger
try:
import cv2
except ImportError:
logger.warning("OpenCV2 is not installed, using PIL to process")
cv2 = None
__all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"]
class CvTransformer(ImageTransformer):
"""
CvTransformer used python-opencv to process image.
"""
def __init__(
self,
min_size=None,
crop_size=None,
transpose=(2, 0, 1), # transpose to C * H * W
channel_swap=None,
mean=None,
is_train=True,
is_color=True):
ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color)
self.min_size = min_size
self.crop_size = crop_size
self.is_train = is_train
def resize(self, im, min_size):
row, col = im.shape[:2]
new_row, new_col = min_size, min_size
if row > col:
new_row = min_size * row / col
else:
new_col = min_size * col / row
im = cv2.resize(im, (new_row, new_col), interpolation=cv2.INTER_CUBIC)
return im
def crop_and_flip(self, im):
"""
Return cropped image.
The size of the cropped image is inner_size * inner_size.
im: (H x W x K) ndarrays
"""
row, col = im.shape[:2]
start_h, start_w = 0, 0
if self.is_train:
start_h = np.random.randint(0, row - self.crop_size + 1)
start_w = np.random.randint(0, col - self.crop_size + 1)
else:
start_h = (row - self.crop_size) / 2
start_w = (col - self.crop_size) / 2
end_h, end_w = start_h + self.crop_size, start_w + self.crop_size
if self.is_color:
im = im[start_h:end_h, start_w:end_w, :]
else:
im = im[start_h:end_h, start_w:end_w]
if (self.is_train) and (np.random.randint(2) == 0):
if self.is_color:
im = im[:, ::-1, :]
else:
im = im[:, ::-1]
return im
def transform(self, im):
im = self.resize(im, self.min_size)
im = self.crop_and_flip(im)
# transpose, swap channel, sub mean
im = im.astype('float32')
ImageTransformer.transformer(self, im)
return im
def load_image_from_string(self, data):
flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE
im = cv2.imdecode(np.fromstring(data, np.uint8), flag)
return im
def transform_from_string(self, data):
im = self.load_image_from_string(data)
return self.transform(im)
def load_image_from_file(self, file):
flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE
im = cv2.imread(file, flag)
return im
def transform_from_file(self, file):
im = self.load_image_from_file(file)
return self.transform(im)
class PILTransformer(ImageTransformer):
"""
PILTransformer used PIL to process image.
"""
def __init__(
self,
min_size=None,
crop_size=None,
transpose=(2, 0, 1), # transpose to C * H * W
channel_swap=None,
mean=None,
is_train=True,
is_color=True):
ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color)
self.min_size = min_size
self.crop_size = crop_size
self.is_train = is_train
def resize(self, im, min_size):
row, col = im.size[:2]
new_row, new_col = min_size, min_size
if row > col:
new_row = min_size * row / col
else:
new_col = min_size * col / row
im = im.resize((new_row, new_col), Image.ANTIALIAS)
return im
def crop_and_flip(self, im):
"""
Return cropped image.
The size of the cropped image is inner_size * inner_size.
"""
row, col = im.size[:2]
start_h, start_w = 0, 0
if self.is_train:
start_h = np.random.randint(0, row - self.crop_size + 1)
start_w = np.random.randint(0, col - self.crop_size + 1)
else:
start_h = (row - self.crop_size) / 2
start_w = (col - self.crop_size) / 2
end_h, end_w = start_h + self.crop_size, start_w + self.crop_size
im = im.crop((start_h, start_w, end_h, end_w))
if (self.is_train) and (np.random.randint(2) == 0):
im = im.transpose(Image.FLIP_LEFT_RIGHT)
return im
def transform(self, im):
im = self.resize(im, self.min_size)
im = self.crop_and_flip(im)
im = np.array(im, dtype=np.float32) # convert to numpy.array
# transpose, swap channel, sub mean
ImageTransformer.transformer(self, im)
return im
def load_image_from_string(self, data):
im = Image.open(StringIO(data))
return im
def transform_from_string(self, data):
im = self.load_image_from_string(data)
return self.transform(im)
def load_image_from_file(self, file):
im = Image.open(file)
return im
def transform_from_file(self, file):
im = self.load_image_from_file(file)
return self.transform(im)
def job(is_img_string, transformer, data_label_pack):
(data, label) = data_label_pack
if is_img_string:
return transformer.transform_from_string(data), label
else:
return transformer.transform_from_file(data), label
class MultiProcessImageTransformer(object):
def __init__(self,
procnum=10,
resize_size=None,
crop_size=None,
transpose=(2, 0, 1),
channel_swap=None,
mean=None,
is_train=True,
is_color=True,
is_img_string=True):
"""
Processing image with multi-process. If it is used in PyDataProvider,
the simple usage for CNN is as follows:
.. code-block:: python
def hool(settings, is_train, **kwargs):
settings.is_train = is_train
settings.mean_value = np.array([103.939,116.779,123.68], dtype=np.float32)
settings.input_types = [
dense_vector(3 * 224 * 224),
integer_value(1)]
settings.transformer = MultiProcessImageTransformer(
procnum=10,
resize_size=256,
crop_size=224,
transpose=(2, 0, 1),
mean=settings.mean_values,
is_train=settings.is_train)
@provider(init_hook=hook, pool_size=20480)
def process(settings, file_list):
with open(file_list, 'r') as fdata:
for line in fdata:
data_dic = np.load(line.strip()) # load the data batch pickled by Pickle.
data = data_dic['data']
labels = data_dic['label']
labels = np.array(labels, dtype=np.float32)
for im, lab in settings.dp.run(data, labels):
yield [im.astype('float32'), int(lab)]
:param procnum: processor number.
:type procnum: int
:param resize_size: the shorter edge size of image after resizing.
:type resize_size: int
:param crop_size: the croping size.
:type crop_size: int
:param transpose: the transpose order, Paddle only allow C * H * W order.
:type transpose: tuple or list
:param channel_swap: the channel swap order, RGB or BRG.
:type channel_swap: tuple or list
:param mean: the mean values of image, per-channel mean or element-wise mean.
:type mean: array, The dimension is 1 for per-channel mean.
The dimension is 3 for element-wise mean.
:param is_train: training peroid or testing peroid.
:type is_train: bool.
:param is_color: the image is color or gray.
:type is_color: bool.
:param is_img_string: The input can be the file name of image or image string.
:type is_img_string: bool.
"""
self.procnum = procnum
self.pool = multiprocessing.Pool(procnum)
self.is_img_string = is_img_string
if cv2 is not None:
self.transformer = CvTransformer(resize_size, crop_size, transpose,
channel_swap, mean, is_train,
is_color)
else:
self.transformer = PILTransformer(resize_size, crop_size, transpose,
channel_swap, mean, is_train,
is_color)
def run(self, data, label):
fun = functools.partial(job, self.is_img_string, self.transformer)
return self.pool.imap_unordered(
fun, six.moves.zip(data, label), chunksize=100 * self.procnum)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Generate dot diagram file for the given paddle model config
# The generated file can be viewed using Graphviz (http://graphviz.org)
from __future__ import print_function
import six
import sys
import traceback
from paddle.trainer.config_parser import parse_config
def make_layer_label(layer_config):
label = '%s type=%s' % (layer_config.name, layer_config.type)
if layer_config.reversed:
label += ' <=='
label2 = ''
if layer_config.active_type:
label2 += 'act=%s ' % layer_config.active_type
if layer_config.bias_parameter_name:
label2 += 'bias=%s ' % layer_config.bias_parameter_name
if label2:
label += '\l' + label2
return label
def make_diagram(config_file, dot_file, config_arg_str):
config = parse_config(config_file, config_arg_str)
make_diagram_from_proto(config.model_config, dot_file)
def make_diagram_from_proto(model_config, dot_file):
# print >> sys.stderr, config
name2id = {}
f = open(dot_file, 'w')
submodel_layers = set()
def make_link(link):
return 'l%s -> l%s;' % (name2id[link.layer_name],
name2id[link.link_name])
def make_mem(mem):
s = ''
if mem.boot_layer_name:
s += 'l%s -> l%s;\n' % (name2id[mem.boot_layer_name],
name2id[mem.layer_name])
s += 'l%s -> l%s [style=dashed];' % (name2id[mem.layer_name],
name2id[mem.link_name])
return s
print('digraph graphname {', file=f)
print('node [width=0.375,height=0.25];', file=f)
for i in six.moves.xrange(len(model_config.layers)):
l = model_config.layers[i]
name2id[l.name] = i
i = 0
for sub_model in model_config.sub_models:
if sub_model.name == 'root':
continue
print('subgraph cluster_%s {' % i, file=f)
print('style=dashed;', file=f)
label = '%s ' % sub_model.name
if sub_model.reversed:
label += '<=='
print('label = "%s";' % label, file=f)
i += 1
submodel_layers.add(sub_model.name)
for layer_name in sub_model.layer_names:
submodel_layers.add(layer_name)
lid = name2id[layer_name]
layer_config = model_config.layers[lid]
label = make_layer_label(layer_config)
print('l%s [label="%s", shape=box];' % (lid, label), file=f)
print('}', file=f)
for i in six.moves.xrange(len(model_config.layers)):
l = model_config.layers[i]
if l.name not in submodel_layers:
label = make_layer_label(l)
print('l%s [label="%s", shape=box];' % (i, label), file=f)
for sub_model in model_config.sub_models:
if sub_model.name == 'root':
continue
for link in sub_model.in_links:
print(make_link(link), file=f)
for link in sub_model.out_links:
print(make_link(link), file=f)
for mem in sub_model.memories:
print(make_mem(mem), file=f)
for i in six.moves.xrange(len(model_config.layers)):
for l in model_config.layers[i].inputs:
print(
'l%s -> l%s [label="%s"];' % (name2id[l.input_layer_name], i,
l.input_parameter_name),
file=f)
print('}', file=f)
f.close()
def usage():
print(
("Usage: python show_model_diagram.py" +
" CONFIG_FILE DOT_FILE [config_str]"),
file=sys.stderr)
exit(1)
if __name__ == '__main__':
if len(sys.argv) < 3 or len(sys.argv) > 4:
usage()
config_file = sys.argv[1]
dot_file = sys.argv[2]
config_arg_str = sys.argv[3] if len(sys.argv) == 4 else ''
try:
make_diagram(config_file, dot_file, config_arg_str)
except:
traceback.print_exc()
raise
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gzip
import struct
import os
from paddle.trainer_config_helpers.layers import LayerOutput
from paddle.v2.parameters import Parameters
from paddle.proto import ModelConfig_pb2
from paddle.v2.topology import Topology
def merge_v2_model(net, param_file, output_file):
'''Merge the model config and parameters into one file.
The model configuration file describes the model structure which
ends with .py. The parameters file stores the parameters of the model
which ends with .tar.gz.
@param net The output layer of the network for inference.
@param param_file Path of the parameters (.tar.gz) which is stored by
v2 api.
@param output_file Path of the merged file which will be generated.
Usage:
from paddle.utils.merge_model import merge_v2_model
# import your network configuration
from example_net import net_conf
net = net_conf(is_predict=True)
param_file = './param_pass_00000.tar.gz'
output_file = './output.paddle'
merge_v2_model(net, param_file, output_file)
'''
assert isinstance(net, LayerOutput), \
"The net should be the output of the network for inference"
assert os.path.exists(param_file), \
"The model parameters file %s does not exists " % (param_file)
model_proto = Topology(net).proto()
assert isinstance(model_proto, ModelConfig_pb2.ModelConfig)
with gzip.open(param_file) as f:
params = Parameters.from_tar(f)
if os.path.exists(output_file):
os.remove(output_file)
with open(output_file, 'w') as f:
param_names = [param.name for param in model_proto.parameters]
conf_str = model_proto.SerializeToString()
f.write(struct.pack('q', len(conf_str)))
f.write(conf_str)
for pname in param_names:
params.serialize(pname, f)
print('Generate %s success!' % (output_file))
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import six
import os
from paddle.trainer.config_parser import *
from paddle.utils.preprocess_img import \
ImageClassificationDatasetCreater
from paddle.trainer_config_helpers import *
def image_data(data_dir,
processed_image_size,
overwrite=False,
color=True,
train_list="batches/train.list",
test_list="batches/test.list",
meta_file="batches/batches.meta",
use_jpeg=1):
"""
Predefined image data provider for image classification.
train_list: a text file containing a list of training batches.
test_list: a text file containing a list of test batches.
processed_image_size: all the input images will be resized into this size.
If the image is not square. Then the shorter edge will be resized into
this size, and the aspect ratio is kept the same.
color: whether the images are color or gray.
meta_path: the path of the meta file that stores the mean image file and
other dataset information, such as the size of images,
the size of the mean image, the number of classes.
async_load_data: whether to load image data asynchronuously.
"""
data_creator = ImageClassificationDatasetCreater(
data_dir, processed_image_size, color)
batch_data_dir = data_dir
train_list = os.path.join(batch_data_dir, train_list)
test_list = os.path.join(batch_data_dir, test_list)
meta_path = os.path.join(batch_data_dir, meta_file)
image_size = processed_image_size
conf = np.load(meta_path)
mean_image_size = conf["mean_image_size"]
is_color = conf["color"]
num_classes = conf["num_classes"]
color_string = "color" if is_color else "gray"
args = {
'meta': meta_path,
'mean_img_size': mean_image_size,
'img_size': image_size,
'num_classes': num_classes,
'use_jpeg': use_jpeg != 0,
'color': color_string
}
define_py_data_sources2(
train_list,
test_list,
module='image_provider',
obj='processData',
args=args)
return {
"image_size": image_size,
"num_classes": num_classes,
"is_color": is_color
}
def get_extra_layer_attr(drop_rate):
if drop_rate == 0:
return None
else:
return ExtraLayerAttribute(drop_rate=drop_rate)
def image_data_layers(image_size, num_classes, is_color=False,
is_predict=False):
"""
Data layers for image classification.
image_size: image size.
num_classes: num of classes.
is_color: whether the input images are color.
is_predict: whether the network is used for prediction.
"""
num_image_channels = 3 if is_color else 1
data_input = data_layer("input",
image_size * image_size * num_image_channels)
if is_predict:
return data_input, None, num_image_channels
else:
label_input = data_layer("label", 1)
return data_input, label_input, num_image_channels
def simple_conv_net(data_conf, is_color=False):
"""
A Wrapper for a simple network for MNIST digit recognition.
It contains two convolutional layers, one fully conencted layer, and
one softmax layer.
data_conf is a dictionary with the following keys:
image_size: image size.
num_classes: num of classes.
is_color: whether the input images are color.
"""
for k, v in six.iteritems(data_conf):
globals()[k] = v
data_input, label_input, num_image_channels = \
image_data_layers(image_size, num_classes, is_color, is_predict)
filter_sizes = [5, 5]
num_channels = [32, 64]
strides = [1, 1]
fc_dims = [500]
conv_bn_pool1 = img_conv_bn_pool(
name="g1",
input=data_input,
filter_size=filter_sizes[0],
num_channel=num_image_channels,
num_filters=num_channels[0],
conv_stride=1,
conv_padding=0,
pool_size=3,
pool_stride=2,
act=ReluActivation())
conv_bn_pool2 = img_conv_bn_pool(
name="g2",
input=conv_bn_pool1,
filter_size=filter_sizes[1],
num_channel=num_channels[0],
num_filters=num_channels[1],
conv_stride=1,
conv_padding=0,
pool_size=3,
pool_stride=2,
act=ReluActivation())
fc3 = fc_layer(
name="fc3", input=conv_bn_pool2, dim=fc_dims[0], act=ReluActivation())
fc3_dropped = dropout_layer(name="fc3_dropped", input=fc3, dropout_rate=0.5)
output = fc_layer(
name="output",
input=fc3_dropped,
dim=fc_dims[0],
act=SoftmaxActivation())
if is_predict:
end_of_network(output)
else:
cost = classify(name="cost", input=output, label=label_input)
end_of_network(cost)
def conv_layer_group(prefix_num,
num_layers,
input,
input_channels,
output_channels,
drop_rates=[],
strides=[],
with_bn=[]):
"""
A set of convolution layers, and batch normalization layers,
followed by one pooling layer.
It is utilized in VGG network for image classifcation.
prefix_num: the prefix number of the layer names.
For example, if prefix_num = 1, the first convolutioal layer's
name will be conv_1_1.
num_layers: number of the convolutional layers.
input: the name of the input layer.
input_channels: the number of channels of the input feature map.
output_channels: the number of channels of the output feature map.
drop_rates: the drop rates of the BN layers. It will be all zero by default.
strides: the stride of the convolution for the layers.
It will be all 1 by default.
with_bn: whether to use Batch Normalization for Conv layers.
By default, it is all false.
"""
if len(drop_rates) == 0: drop_rates = [0] * num_layers
if len(strides) == 0: strides = [1] * num_layers
if len(with_bn) == 0: with_bn = [False] * num_layers
assert (len(drop_rates) == num_layers)
assert (len(strides) == num_layers)
for i in range(1, num_layers + 1):
if i == 1:
i_conv_in = input
else:
i_conv_in = group_output
i_channels_conv = input_channels if i == 1 else output_channels
conv_act = LinearActivation() if with_bn[i - 1] else ReluActivation()
conv_output = img_conv_layer(
name="conv%d_%d" % (prefix_num, i),
input=i_conv_in,
filter_size=3,
num_channels=i_channels_conv,
num_filters=output_channels,
stride=strides[i - 1],
padding=1,
act=conv_act)
if with_bn[i - 1]:
bn = batch_norm_layer(
name="conv%d_%d_bn" % (prefix_num, i),
input=conv_output,
num_channels=output_channels,
act=ReluActivation(),
layer_attr=get_extra_layer_attr(drop_rate=drop_rates[i - 1]))
group_output = bn
else:
group_output = conv_output
pool = img_pool_layer(
name="pool%d" % prefix_num,
input=group_output,
pool_size=2,
num_channels=output_channels,
stride=2)
return pool
def vgg_conv_net(image_size,
num_classes,
num_layers,
channels,
strides,
with_bn,
fc_dims,
drop_rates,
drop_rates_fc=[],
is_color=True,
is_predict=False):
"""
A Wrapper for a VGG network for image classification.
It is a set of convolutional groups followed by several fully
connected layers, and a cross-entropy classifiation loss.
The detailed architecture of the paper can be found here:
Very Deep Convolutional Networks for Large-Scale Visual Recognition
http://www.robots.ox.ac.uk/~vgg/research/very_deep/
image_size: image size.
num_classes: num of classes.
num_layers: the number of layers for all the convolution groups.
channels: the number of output filters for all the convolution groups.
with_bn: whether each layer of a convolution group is followed by a
batch normalization.
drop_rates: the dropout rates for all the convolutional layers.
fc_dims: the dimension for all the fully connected layers.
is_color: whether the input images are color.
"""
data_input, label_input, num_image_channels = \
image_data_layers(image_size, num_classes, is_color, is_predict)
assert (len(num_layers) == len(channels))
assert (len(num_layers) == len(strides))
assert (len(num_layers) == len(with_bn))
num_fc_layers = len(fc_dims)
assert (num_fc_layers + 1 == len(drop_rates_fc))
for i in range(len(num_layers)):
input_layer = data_input if i == 0 else group_output
input_channels = 3 if i == 0 else channels[i - 1]
group_output = conv_layer_group(
prefix_num=i + 1,
num_layers=num_layers[i],
input=input_layer,
input_channels=input_channels,
output_channels=channels[i],
drop_rates=drop_rates[i],
strides=strides[i],
with_bn=with_bn[i])
conv_output_name = group_output
if drop_rates_fc[0] != 0.0:
dropped_pool_name = "pool_dropped"
conv_output_name = dropout_layer(
name=dropped_pool_name,
input=conv_output_name,
dropout_rate=drop_rates_fc[0])
for i in range(len(fc_dims)):
input_layer_name = conv_output_name if i == 0 else fc_output
active_type = LinearActivation() if i == len(
fc_dims) - 1 else ReluActivation()
drop_rate = 0.0 if i == len(fc_dims) - 1 else drop_rates_fc[i + 1]
fc_output = fc_layer(
name="fc%d" % (i + 1),
input=input_layer_name,
size=fc_dims[i],
act=active_type,
layer_attr=get_extra_layer_attr(drop_rate))
bn = batch_norm_layer(
name="fc_bn",
input=fc_output,
num_channels=fc_dims[len(fc_dims) - 1],
act=ReluActivation(),
layer_attr=get_extra_layer_attr(drop_rate=drop_rates_fc[-1]))
output = fc_layer(
name="output", input=bn, size=num_classes, act=SoftmaxActivation())
if is_predict:
outputs(output)
else:
cost = classification_cost(name="cost", input=output, label=label_input)
outputs(cost)
def vgg16_conv_net(image_size, num_classes, is_color=True, is_predict=False):
"""
A Wrapper for a 16 layers VGG network for image classification.
The detailed architecture of the paper can be found here:
Very Deep Convolutional Networks for Large-Scale Visual Recognition
http://www.robots.ox.ac.uk/~vgg/research/very_deep/
image_size: image size.
num_classes: num of classes.
is_color: whether the input images are color.
"""
vgg_conv_net(image_size, num_classes,
num_layers=[2, 2, 3, 3, 3],
channels=[64, 128, 256, 512, 512],
strides=[[], [], [], [], []],
with_bn=[[False, True], [False, True], [False, False, True], \
[False, False, True], [False, False, True]],
drop_rates=[[]] * 5,
drop_rates_fc=[0.0, 0.5, 0.5],
fc_dims=[4096, 4096],
is_predict=is_predict)
def small_vgg(data_conf, is_predict=False):
"""
A Wrapper for a small VGG network for CIFAR-10 image classification.
The detailed architecture of the paper can be found here:
92.45% on CIFAR-10 in Torch
http://torch.ch/blog/2015/07/30/cifar.html
Due to the constraints of CuDNN, it only has four convolutional groups
rather than five.
Thus, it only achieves 91.2% test accuracy and 98.1% training accuracy.
data_conf is a dictionary with the following keys:
image_size: image size.
num_classes: num of classes.
is_color: whether the input images are color.
"""
for k, v in six.iteritems(data_conf):
globals()[k] = v
vgg_conv_net(image_size, num_classes,
num_layers=[2, 2, 3, 3],
channels=[64, 128, 256, 512],
strides=[[], [], [], []],
with_bn=[[True, True], [True, True], [True, True, True], \
[True, True, True]],
drop_rates=[[0.3, 0.0], [0.4, 0.0],
[0.4, 0.4, 0.0], [0.4, 0.4, 0.0]],
drop_rates_fc=[0.5, 0.5],
fc_dims=[512],
is_predict=is_predict)
def training_settings(learning_rate=0.1,
batch_size=128,
algorithm="sgd",
momentum=0.9,
decay_rate=0.001):
"""
Training settings.
learning_rate: learning rate of the training.
batch_size: the size of each training batch.
algorithm: training algorithm, can be
- sgd
- adagrad
- adadelta
- rmsprop
momentum: momentum of the training algorithm.
decay_rate: weight decay rate.
"""
Settings(
algorithm=algorithm,
batch_size=batch_size,
learning_rate=learning_rate / float(batch_size))
default_momentum(momentum)
default_decay_rate(decay_rate * batch_size)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册