diff --git a/python/paddle/utils/dump_config.py b/python/paddle/utils/dump_config.py deleted file mode 100644 index 6a96a0a78fc77c50904ee7822c725c41e646c5e6..0000000000000000000000000000000000000000 --- a/python/paddle/utils/dump_config.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import parse_config -from paddle.proto import TrainerConfig_pb2 -import sys - -__all__ = [] - -if __name__ == '__main__': - whole_conf = False - binary = False - if len(sys.argv) == 2: - conf = parse_config(sys.argv[1], '') - elif len(sys.argv) == 3: - conf = parse_config(sys.argv[1], sys.argv[2]) - elif len(sys.argv) == 4: - conf = parse_config(sys.argv[1], sys.argv[2]) - if sys.argv[3] == '--whole': - whole_conf = True - elif sys.argv[3] == '--binary': - binary = True - else: - raise RuntimeError() - - assert isinstance(conf, TrainerConfig_pb2.TrainerConfig) - - if whole_conf: - print(conf) - else: - if binary: - sys.stdout.write(conf.model_config.SerializeToString()) - else: - print(conf.model_config) diff --git a/python/paddle/utils/dump_v2_config.py b/python/paddle/utils/dump_v2_config.py deleted file mode 100644 index 5dc2111e379fd39b40e1e9bcf2e577b57b101a68..0000000000000000000000000000000000000000 --- a/python/paddle/utils/dump_v2_config.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections - -from paddle.trainer_config_helpers.layers import LayerOutput -from paddle.v2.layer import parse_network -from paddle.proto import TrainerConfig_pb2 - -__all__ = ["dump_v2_config"] - - -def dump_v2_config(topology, save_path, binary=False): - """ Dump the network topology to a specified file. - - This function is only used to dump network defined by using PaddlePaddle V2 - APIs. This function will NOT dump configurations related to PaddlePaddle - optimizer. - - :param topology: The output layers (can be more than one layers given in a - Python List or Tuple) of the entire network. Using the - specified layers (if more than one layer is given) as root, - traversing back to the data layer(s), all the layers - connected to the specified output layers will be dumped. - Layers not connceted to the specified will not be dumped. - :type topology: LayerOutput|List|Tuple - :param save_path: The path to save the dumped network topology. - :type save_path: str - :param binary: Whether to dump the serialized network topology or not. - The default value is false. NOTE that, if you call this - function to generate network topology for PaddlePaddle C-API, - a serialized version of network topology is required. When - using PaddlePaddle C-API, this flag MUST be set to True. - :type binary: bool - """ - - if isinstance(topology, LayerOutput): - topology = [topology] - elif isinstance(topology, collections.Sequence): - for out_layer in topology: - assert isinstance(out_layer, LayerOutput), ( - "The type of each element in the parameter topology " - "should be LayerOutput.") - else: - raise RuntimeError("Error input type for parameter topology.") - - model_str = parse_network(topology) - with open(save_path, "w") as fout: - if binary: - fout.write(model_str.SerializeToString()) - else: - fout.write(str(model_str)) diff --git a/python/paddle/utils/image_multiproc.py b/python/paddle/utils/image_multiproc.py deleted file mode 100644 index d1bbda3fd3562efe486377d41a9fb7359bafa4e7..0000000000000000000000000000000000000000 --- a/python/paddle/utils/image_multiproc.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os, sys -import numpy as np -from PIL import Image -import six -from six.moves import cStringIO as StringIO -import multiprocessing -import functools -import itertools - -from paddle.utils.image_util import * -from paddle.trainer.config_parser import logger - -try: - import cv2 -except ImportError: - logger.warning("OpenCV2 is not installed, using PIL to process") - cv2 = None - -__all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"] - - -class CvTransformer(ImageTransformer): - """ - CvTransformer used python-opencv to process image. - """ - - def __init__( - self, - min_size=None, - crop_size=None, - transpose=(2, 0, 1), # transpose to C * H * W - channel_swap=None, - mean=None, - is_train=True, - is_color=True): - ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color) - self.min_size = min_size - self.crop_size = crop_size - self.is_train = is_train - - def resize(self, im, min_size): - row, col = im.shape[:2] - new_row, new_col = min_size, min_size - if row > col: - new_row = min_size * row / col - else: - new_col = min_size * col / row - im = cv2.resize(im, (new_row, new_col), interpolation=cv2.INTER_CUBIC) - return im - - def crop_and_flip(self, im): - """ - Return cropped image. - The size of the cropped image is inner_size * inner_size. - im: (H x W x K) ndarrays - """ - row, col = im.shape[:2] - start_h, start_w = 0, 0 - if self.is_train: - start_h = np.random.randint(0, row - self.crop_size + 1) - start_w = np.random.randint(0, col - self.crop_size + 1) - else: - start_h = (row - self.crop_size) / 2 - start_w = (col - self.crop_size) / 2 - end_h, end_w = start_h + self.crop_size, start_w + self.crop_size - if self.is_color: - im = im[start_h:end_h, start_w:end_w, :] - else: - im = im[start_h:end_h, start_w:end_w] - if (self.is_train) and (np.random.randint(2) == 0): - if self.is_color: - im = im[:, ::-1, :] - else: - im = im[:, ::-1] - return im - - def transform(self, im): - im = self.resize(im, self.min_size) - im = self.crop_and_flip(im) - # transpose, swap channel, sub mean - im = im.astype('float32') - ImageTransformer.transformer(self, im) - return im - - def load_image_from_string(self, data): - flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE - im = cv2.imdecode(np.fromstring(data, np.uint8), flag) - return im - - def transform_from_string(self, data): - im = self.load_image_from_string(data) - return self.transform(im) - - def load_image_from_file(self, file): - flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE - im = cv2.imread(file, flag) - return im - - def transform_from_file(self, file): - im = self.load_image_from_file(file) - return self.transform(im) - - -class PILTransformer(ImageTransformer): - """ - PILTransformer used PIL to process image. - """ - - def __init__( - self, - min_size=None, - crop_size=None, - transpose=(2, 0, 1), # transpose to C * H * W - channel_swap=None, - mean=None, - is_train=True, - is_color=True): - ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color) - self.min_size = min_size - self.crop_size = crop_size - self.is_train = is_train - - def resize(self, im, min_size): - row, col = im.size[:2] - new_row, new_col = min_size, min_size - if row > col: - new_row = min_size * row / col - else: - new_col = min_size * col / row - im = im.resize((new_row, new_col), Image.ANTIALIAS) - return im - - def crop_and_flip(self, im): - """ - Return cropped image. - The size of the cropped image is inner_size * inner_size. - """ - row, col = im.size[:2] - start_h, start_w = 0, 0 - if self.is_train: - start_h = np.random.randint(0, row - self.crop_size + 1) - start_w = np.random.randint(0, col - self.crop_size + 1) - else: - start_h = (row - self.crop_size) / 2 - start_w = (col - self.crop_size) / 2 - end_h, end_w = start_h + self.crop_size, start_w + self.crop_size - im = im.crop((start_h, start_w, end_h, end_w)) - if (self.is_train) and (np.random.randint(2) == 0): - im = im.transpose(Image.FLIP_LEFT_RIGHT) - return im - - def transform(self, im): - im = self.resize(im, self.min_size) - im = self.crop_and_flip(im) - im = np.array(im, dtype=np.float32) # convert to numpy.array - # transpose, swap channel, sub mean - ImageTransformer.transformer(self, im) - return im - - def load_image_from_string(self, data): - im = Image.open(StringIO(data)) - return im - - def transform_from_string(self, data): - im = self.load_image_from_string(data) - return self.transform(im) - - def load_image_from_file(self, file): - im = Image.open(file) - return im - - def transform_from_file(self, file): - im = self.load_image_from_file(file) - return self.transform(im) - - -def job(is_img_string, transformer, data_label_pack): - (data, label) = data_label_pack - if is_img_string: - return transformer.transform_from_string(data), label - else: - return transformer.transform_from_file(data), label - - -class MultiProcessImageTransformer(object): - def __init__(self, - procnum=10, - resize_size=None, - crop_size=None, - transpose=(2, 0, 1), - channel_swap=None, - mean=None, - is_train=True, - is_color=True, - is_img_string=True): - """ - Processing image with multi-process. If it is used in PyDataProvider, - the simple usage for CNN is as follows: - - .. code-block:: python - - def hool(settings, is_train, **kwargs): - settings.is_train = is_train - settings.mean_value = np.array([103.939,116.779,123.68], dtype=np.float32) - settings.input_types = [ - dense_vector(3 * 224 * 224), - integer_value(1)] - settings.transformer = MultiProcessImageTransformer( - procnum=10, - resize_size=256, - crop_size=224, - transpose=(2, 0, 1), - mean=settings.mean_values, - is_train=settings.is_train) - - - @provider(init_hook=hook, pool_size=20480) - def process(settings, file_list): - with open(file_list, 'r') as fdata: - for line in fdata: - data_dic = np.load(line.strip()) # load the data batch pickled by Pickle. - data = data_dic['data'] - labels = data_dic['label'] - labels = np.array(labels, dtype=np.float32) - for im, lab in settings.dp.run(data, labels): - yield [im.astype('float32'), int(lab)] - - :param procnum: processor number. - :type procnum: int - :param resize_size: the shorter edge size of image after resizing. - :type resize_size: int - :param crop_size: the croping size. - :type crop_size: int - :param transpose: the transpose order, Paddle only allow C * H * W order. - :type transpose: tuple or list - :param channel_swap: the channel swap order, RGB or BRG. - :type channel_swap: tuple or list - :param mean: the mean values of image, per-channel mean or element-wise mean. - :type mean: array, The dimension is 1 for per-channel mean. - The dimension is 3 for element-wise mean. - :param is_train: training peroid or testing peroid. - :type is_train: bool. - :param is_color: the image is color or gray. - :type is_color: bool. - :param is_img_string: The input can be the file name of image or image string. - :type is_img_string: bool. - """ - - self.procnum = procnum - self.pool = multiprocessing.Pool(procnum) - self.is_img_string = is_img_string - if cv2 is not None: - self.transformer = CvTransformer(resize_size, crop_size, transpose, - channel_swap, mean, is_train, - is_color) - else: - self.transformer = PILTransformer(resize_size, crop_size, transpose, - channel_swap, mean, is_train, - is_color) - - def run(self, data, label): - fun = functools.partial(job, self.is_img_string, self.transformer) - return self.pool.imap_unordered( - fun, six.moves.zip(data, label), chunksize=100 * self.procnum) diff --git a/python/paddle/utils/make_model_diagram.py b/python/paddle/utils/make_model_diagram.py deleted file mode 100644 index 52759d3ad230c3a5a5488a8bc46a2e8f8fae1025..0000000000000000000000000000000000000000 --- a/python/paddle/utils/make_model_diagram.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Generate dot diagram file for the given paddle model config -# The generated file can be viewed using Graphviz (http://graphviz.org) - -from __future__ import print_function - -import six -import sys -import traceback - -from paddle.trainer.config_parser import parse_config - - -def make_layer_label(layer_config): - label = '%s type=%s' % (layer_config.name, layer_config.type) - if layer_config.reversed: - label += ' <==' - - label2 = '' - if layer_config.active_type: - label2 += 'act=%s ' % layer_config.active_type - if layer_config.bias_parameter_name: - label2 += 'bias=%s ' % layer_config.bias_parameter_name - - if label2: - label += '\l' + label2 - return label - - -def make_diagram(config_file, dot_file, config_arg_str): - config = parse_config(config_file, config_arg_str) - make_diagram_from_proto(config.model_config, dot_file) - - -def make_diagram_from_proto(model_config, dot_file): - # print >> sys.stderr, config - name2id = {} - f = open(dot_file, 'w') - submodel_layers = set() - - def make_link(link): - return 'l%s -> l%s;' % (name2id[link.layer_name], - name2id[link.link_name]) - - def make_mem(mem): - s = '' - if mem.boot_layer_name: - s += 'l%s -> l%s;\n' % (name2id[mem.boot_layer_name], - name2id[mem.layer_name]) - s += 'l%s -> l%s [style=dashed];' % (name2id[mem.layer_name], - name2id[mem.link_name]) - return s - - print('digraph graphname {', file=f) - print('node [width=0.375,height=0.25];', file=f) - for i in six.moves.xrange(len(model_config.layers)): - l = model_config.layers[i] - name2id[l.name] = i - - i = 0 - for sub_model in model_config.sub_models: - if sub_model.name == 'root': - continue - print('subgraph cluster_%s {' % i, file=f) - print('style=dashed;', file=f) - label = '%s ' % sub_model.name - if sub_model.reversed: - label += '<==' - print('label = "%s";' % label, file=f) - i += 1 - submodel_layers.add(sub_model.name) - for layer_name in sub_model.layer_names: - submodel_layers.add(layer_name) - lid = name2id[layer_name] - layer_config = model_config.layers[lid] - label = make_layer_label(layer_config) - print('l%s [label="%s", shape=box];' % (lid, label), file=f) - print('}', file=f) - - for i in six.moves.xrange(len(model_config.layers)): - l = model_config.layers[i] - if l.name not in submodel_layers: - label = make_layer_label(l) - print('l%s [label="%s", shape=box];' % (i, label), file=f) - - for sub_model in model_config.sub_models: - if sub_model.name == 'root': - continue - for link in sub_model.in_links: - print(make_link(link), file=f) - for link in sub_model.out_links: - print(make_link(link), file=f) - for mem in sub_model.memories: - print(make_mem(mem), file=f) - - for i in six.moves.xrange(len(model_config.layers)): - for l in model_config.layers[i].inputs: - print( - 'l%s -> l%s [label="%s"];' % (name2id[l.input_layer_name], i, - l.input_parameter_name), - file=f) - - print('}', file=f) - f.close() - - -def usage(): - print( - ("Usage: python show_model_diagram.py" + - " CONFIG_FILE DOT_FILE [config_str]"), - file=sys.stderr) - exit(1) - - -if __name__ == '__main__': - if len(sys.argv) < 3 or len(sys.argv) > 4: - usage() - - config_file = sys.argv[1] - dot_file = sys.argv[2] - config_arg_str = sys.argv[3] if len(sys.argv) == 4 else '' - - try: - make_diagram(config_file, dot_file, config_arg_str) - except: - traceback.print_exc() - raise diff --git a/python/paddle/utils/merge_model.py b/python/paddle/utils/merge_model.py deleted file mode 100644 index b74649e93640c3600636034d58792b8d12dffeda..0000000000000000000000000000000000000000 --- a/python/paddle/utils/merge_model.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import gzip -import struct -import os - -from paddle.trainer_config_helpers.layers import LayerOutput -from paddle.v2.parameters import Parameters -from paddle.proto import ModelConfig_pb2 -from paddle.v2.topology import Topology - - -def merge_v2_model(net, param_file, output_file): - '''Merge the model config and parameters into one file. - - The model configuration file describes the model structure which - ends with .py. The parameters file stores the parameters of the model - which ends with .tar.gz. - - @param net The output layer of the network for inference. - @param param_file Path of the parameters (.tar.gz) which is stored by - v2 api. - @param output_file Path of the merged file which will be generated. - - Usage: - - from paddle.utils.merge_model import merge_v2_model - # import your network configuration - from example_net import net_conf - - net = net_conf(is_predict=True) - param_file = './param_pass_00000.tar.gz' - output_file = './output.paddle' - - merge_v2_model(net, param_file, output_file) - - ''' - - assert isinstance(net, LayerOutput), \ - "The net should be the output of the network for inference" - assert os.path.exists(param_file), \ - "The model parameters file %s does not exists " % (param_file) - - model_proto = Topology(net).proto() - assert isinstance(model_proto, ModelConfig_pb2.ModelConfig) - - with gzip.open(param_file) as f: - params = Parameters.from_tar(f) - - if os.path.exists(output_file): - os.remove(output_file) - - with open(output_file, 'w') as f: - param_names = [param.name for param in model_proto.parameters] - conf_str = model_proto.SerializeToString() - f.write(struct.pack('q', len(conf_str))) - f.write(conf_str) - for pname in param_names: - params.serialize(pname, f) - - print('Generate %s success!' % (output_file)) diff --git a/python/paddle/utils/predefined_net.py b/python/paddle/utils/predefined_net.py deleted file mode 100644 index 2801f4877c079615239b92be146b3e33df16b37f..0000000000000000000000000000000000000000 --- a/python/paddle/utils/predefined_net.py +++ /dev/null @@ -1,381 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import six -import os -from paddle.trainer.config_parser import * -from paddle.utils.preprocess_img import \ - ImageClassificationDatasetCreater -from paddle.trainer_config_helpers import * - - -def image_data(data_dir, - processed_image_size, - overwrite=False, - color=True, - train_list="batches/train.list", - test_list="batches/test.list", - meta_file="batches/batches.meta", - use_jpeg=1): - """ - Predefined image data provider for image classification. - train_list: a text file containing a list of training batches. - test_list: a text file containing a list of test batches. - processed_image_size: all the input images will be resized into this size. - If the image is not square. Then the shorter edge will be resized into - this size, and the aspect ratio is kept the same. - color: whether the images are color or gray. - meta_path: the path of the meta file that stores the mean image file and - other dataset information, such as the size of images, - the size of the mean image, the number of classes. - async_load_data: whether to load image data asynchronuously. - """ - data_creator = ImageClassificationDatasetCreater( - data_dir, processed_image_size, color) - batch_data_dir = data_dir - train_list = os.path.join(batch_data_dir, train_list) - test_list = os.path.join(batch_data_dir, test_list) - meta_path = os.path.join(batch_data_dir, meta_file) - image_size = processed_image_size - conf = np.load(meta_path) - mean_image_size = conf["mean_image_size"] - is_color = conf["color"] - num_classes = conf["num_classes"] - color_string = "color" if is_color else "gray" - - args = { - 'meta': meta_path, - 'mean_img_size': mean_image_size, - 'img_size': image_size, - 'num_classes': num_classes, - 'use_jpeg': use_jpeg != 0, - 'color': color_string - } - - define_py_data_sources2( - train_list, - test_list, - module='image_provider', - obj='processData', - args=args) - return { - "image_size": image_size, - "num_classes": num_classes, - "is_color": is_color - } - - -def get_extra_layer_attr(drop_rate): - if drop_rate == 0: - return None - else: - return ExtraLayerAttribute(drop_rate=drop_rate) - - -def image_data_layers(image_size, num_classes, is_color=False, - is_predict=False): - """ - Data layers for image classification. - image_size: image size. - num_classes: num of classes. - is_color: whether the input images are color. - is_predict: whether the network is used for prediction. - """ - num_image_channels = 3 if is_color else 1 - data_input = data_layer("input", - image_size * image_size * num_image_channels) - if is_predict: - return data_input, None, num_image_channels - else: - label_input = data_layer("label", 1) - return data_input, label_input, num_image_channels - - -def simple_conv_net(data_conf, is_color=False): - """ - A Wrapper for a simple network for MNIST digit recognition. - It contains two convolutional layers, one fully conencted layer, and - one softmax layer. - data_conf is a dictionary with the following keys: - image_size: image size. - num_classes: num of classes. - is_color: whether the input images are color. - """ - for k, v in six.iteritems(data_conf): - globals()[k] = v - data_input, label_input, num_image_channels = \ - image_data_layers(image_size, num_classes, is_color, is_predict) - filter_sizes = [5, 5] - num_channels = [32, 64] - strides = [1, 1] - fc_dims = [500] - conv_bn_pool1 = img_conv_bn_pool( - name="g1", - input=data_input, - filter_size=filter_sizes[0], - num_channel=num_image_channels, - num_filters=num_channels[0], - conv_stride=1, - conv_padding=0, - pool_size=3, - pool_stride=2, - act=ReluActivation()) - conv_bn_pool2 = img_conv_bn_pool( - name="g2", - input=conv_bn_pool1, - filter_size=filter_sizes[1], - num_channel=num_channels[0], - num_filters=num_channels[1], - conv_stride=1, - conv_padding=0, - pool_size=3, - pool_stride=2, - act=ReluActivation()) - fc3 = fc_layer( - name="fc3", input=conv_bn_pool2, dim=fc_dims[0], act=ReluActivation()) - fc3_dropped = dropout_layer(name="fc3_dropped", input=fc3, dropout_rate=0.5) - output = fc_layer( - name="output", - input=fc3_dropped, - dim=fc_dims[0], - act=SoftmaxActivation()) - if is_predict: - end_of_network(output) - else: - cost = classify(name="cost", input=output, label=label_input) - end_of_network(cost) - - -def conv_layer_group(prefix_num, - num_layers, - input, - input_channels, - output_channels, - drop_rates=[], - strides=[], - with_bn=[]): - """ - A set of convolution layers, and batch normalization layers, - followed by one pooling layer. - It is utilized in VGG network for image classifcation. - prefix_num: the prefix number of the layer names. - For example, if prefix_num = 1, the first convolutioal layer's - name will be conv_1_1. - num_layers: number of the convolutional layers. - input: the name of the input layer. - input_channels: the number of channels of the input feature map. - output_channels: the number of channels of the output feature map. - drop_rates: the drop rates of the BN layers. It will be all zero by default. - strides: the stride of the convolution for the layers. - It will be all 1 by default. - with_bn: whether to use Batch Normalization for Conv layers. - By default, it is all false. - """ - if len(drop_rates) == 0: drop_rates = [0] * num_layers - if len(strides) == 0: strides = [1] * num_layers - if len(with_bn) == 0: with_bn = [False] * num_layers - assert (len(drop_rates) == num_layers) - assert (len(strides) == num_layers) - - for i in range(1, num_layers + 1): - if i == 1: - i_conv_in = input - else: - i_conv_in = group_output - i_channels_conv = input_channels if i == 1 else output_channels - conv_act = LinearActivation() if with_bn[i - 1] else ReluActivation() - conv_output = img_conv_layer( - name="conv%d_%d" % (prefix_num, i), - input=i_conv_in, - filter_size=3, - num_channels=i_channels_conv, - num_filters=output_channels, - stride=strides[i - 1], - padding=1, - act=conv_act) - if with_bn[i - 1]: - bn = batch_norm_layer( - name="conv%d_%d_bn" % (prefix_num, i), - input=conv_output, - num_channels=output_channels, - act=ReluActivation(), - layer_attr=get_extra_layer_attr(drop_rate=drop_rates[i - 1])) - group_output = bn - else: - group_output = conv_output - pool = img_pool_layer( - name="pool%d" % prefix_num, - input=group_output, - pool_size=2, - num_channels=output_channels, - stride=2) - return pool - - -def vgg_conv_net(image_size, - num_classes, - num_layers, - channels, - strides, - with_bn, - fc_dims, - drop_rates, - drop_rates_fc=[], - is_color=True, - is_predict=False): - """ - A Wrapper for a VGG network for image classification. - It is a set of convolutional groups followed by several fully - connected layers, and a cross-entropy classifiation loss. - The detailed architecture of the paper can be found here: - Very Deep Convolutional Networks for Large-Scale Visual Recognition - http://www.robots.ox.ac.uk/~vgg/research/very_deep/ - image_size: image size. - num_classes: num of classes. - num_layers: the number of layers for all the convolution groups. - channels: the number of output filters for all the convolution groups. - with_bn: whether each layer of a convolution group is followed by a - batch normalization. - drop_rates: the dropout rates for all the convolutional layers. - fc_dims: the dimension for all the fully connected layers. - is_color: whether the input images are color. - """ - data_input, label_input, num_image_channels = \ - image_data_layers(image_size, num_classes, is_color, is_predict) - assert (len(num_layers) == len(channels)) - assert (len(num_layers) == len(strides)) - assert (len(num_layers) == len(with_bn)) - num_fc_layers = len(fc_dims) - assert (num_fc_layers + 1 == len(drop_rates_fc)) - - for i in range(len(num_layers)): - input_layer = data_input if i == 0 else group_output - input_channels = 3 if i == 0 else channels[i - 1] - group_output = conv_layer_group( - prefix_num=i + 1, - num_layers=num_layers[i], - input=input_layer, - input_channels=input_channels, - output_channels=channels[i], - drop_rates=drop_rates[i], - strides=strides[i], - with_bn=with_bn[i]) - conv_output_name = group_output - if drop_rates_fc[0] != 0.0: - dropped_pool_name = "pool_dropped" - conv_output_name = dropout_layer( - name=dropped_pool_name, - input=conv_output_name, - dropout_rate=drop_rates_fc[0]) - for i in range(len(fc_dims)): - input_layer_name = conv_output_name if i == 0 else fc_output - active_type = LinearActivation() if i == len( - fc_dims) - 1 else ReluActivation() - drop_rate = 0.0 if i == len(fc_dims) - 1 else drop_rates_fc[i + 1] - fc_output = fc_layer( - name="fc%d" % (i + 1), - input=input_layer_name, - size=fc_dims[i], - act=active_type, - layer_attr=get_extra_layer_attr(drop_rate)) - bn = batch_norm_layer( - name="fc_bn", - input=fc_output, - num_channels=fc_dims[len(fc_dims) - 1], - act=ReluActivation(), - layer_attr=get_extra_layer_attr(drop_rate=drop_rates_fc[-1])) - output = fc_layer( - name="output", input=bn, size=num_classes, act=SoftmaxActivation()) - if is_predict: - outputs(output) - else: - cost = classification_cost(name="cost", input=output, label=label_input) - outputs(cost) - - -def vgg16_conv_net(image_size, num_classes, is_color=True, is_predict=False): - """ - A Wrapper for a 16 layers VGG network for image classification. - The detailed architecture of the paper can be found here: - Very Deep Convolutional Networks for Large-Scale Visual Recognition - http://www.robots.ox.ac.uk/~vgg/research/very_deep/ - image_size: image size. - num_classes: num of classes. - is_color: whether the input images are color. - """ - vgg_conv_net(image_size, num_classes, - num_layers=[2, 2, 3, 3, 3], - channels=[64, 128, 256, 512, 512], - strides=[[], [], [], [], []], - with_bn=[[False, True], [False, True], [False, False, True], \ - [False, False, True], [False, False, True]], - drop_rates=[[]] * 5, - drop_rates_fc=[0.0, 0.5, 0.5], - fc_dims=[4096, 4096], - is_predict=is_predict) - - -def small_vgg(data_conf, is_predict=False): - """ - A Wrapper for a small VGG network for CIFAR-10 image classification. - The detailed architecture of the paper can be found here: - 92.45% on CIFAR-10 in Torch - http://torch.ch/blog/2015/07/30/cifar.html - Due to the constraints of CuDNN, it only has four convolutional groups - rather than five. - Thus, it only achieves 91.2% test accuracy and 98.1% training accuracy. - data_conf is a dictionary with the following keys: - image_size: image size. - num_classes: num of classes. - is_color: whether the input images are color. - """ - for k, v in six.iteritems(data_conf): - globals()[k] = v - vgg_conv_net(image_size, num_classes, - num_layers=[2, 2, 3, 3], - channels=[64, 128, 256, 512], - strides=[[], [], [], []], - with_bn=[[True, True], [True, True], [True, True, True], \ - [True, True, True]], - drop_rates=[[0.3, 0.0], [0.4, 0.0], - [0.4, 0.4, 0.0], [0.4, 0.4, 0.0]], - drop_rates_fc=[0.5, 0.5], - fc_dims=[512], - is_predict=is_predict) - - -def training_settings(learning_rate=0.1, - batch_size=128, - algorithm="sgd", - momentum=0.9, - decay_rate=0.001): - """ - Training settings. - learning_rate: learning rate of the training. - batch_size: the size of each training batch. - algorithm: training algorithm, can be - - sgd - - adagrad - - adadelta - - rmsprop - momentum: momentum of the training algorithm. - decay_rate: weight decay rate. - """ - Settings( - algorithm=algorithm, - batch_size=batch_size, - learning_rate=learning_rate / float(batch_size)) - default_momentum(momentum) - default_decay_rate(decay_rate * batch_size)