提交 c3fe50bc 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into srl_api_v2

import numpy
import paddle.v2 as paddle import paddle.v2 as paddle
import mnist_util import mnist_util
...@@ -40,17 +39,14 @@ def main(): ...@@ -40,17 +39,14 @@ def main():
trainer = paddle.trainer.SGD(update_equation=adam_optimizer) trainer = paddle.trainer.SGD(update_equation=adam_optimizer)
trainer.train(train_data_reader=train_reader, trainer.train(
topology=cost, train_data_reader=train_reader,
parameters=parameters, cost=cost,
event_handler=event_handler, parameters=parameters,
batch_size=32, # batch size should be refactor in Data reader event_handler=event_handler,
data_types=[ # data_types will be removed, It should be in batch_size=32, # batch size should be refactor in Data reader
# network topology reader_dict={images.name: 0,
('pixel', images.type), label.name: 1})
('label', label.type)],
reader_dict={'pixel':0, 'label':1}
)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -139,24 +139,12 @@ lstmemory ...@@ -139,24 +139,12 @@ lstmemory
:members: lstmemory :members: lstmemory
:noindex: :noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
grumemory grumemory
--------- ---------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
:members: grumemory :members: grumemory
:noindex: :noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
Recurrent Layer Group Recurrent Layer Group
===================== =====================
...@@ -172,6 +160,18 @@ recurrent_group ...@@ -172,6 +160,18 @@ recurrent_group
:members: recurrent_group :members: recurrent_group
:noindex: :noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
beam_search beam_search
------------ ------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
...@@ -308,6 +308,12 @@ repeat_layer ...@@ -308,6 +308,12 @@ repeat_layer
:members: repeat_layer :members: repeat_layer
:noindex: :noindex:
rotate_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: rotate_layer
:noindex:
seq_reshape_layer seq_reshape_layer
----------------- -----------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
...@@ -462,6 +468,12 @@ ctc_layer ...@@ -462,6 +468,12 @@ ctc_layer
:members: ctc_layer :members: ctc_layer
:noindex: :noindex:
warp_ctc_layer
--------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: warp_ctc_layer
:noindex:
nce_layer nce_layer
----------- -----------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
......
...@@ -112,6 +112,7 @@ __all__ = [ ...@@ -112,6 +112,7 @@ __all__ = [
'priorbox_layer', 'priorbox_layer',
'spp_layer', 'spp_layer',
'pad_layer', 'pad_layer',
'eos_layer',
'layer_support', 'layer_support',
] ]
...@@ -1289,6 +1290,12 @@ def last_seq(input, ...@@ -1289,6 +1290,12 @@ def last_seq(input,
""" """
Get Last Timestamp Activation of a sequence. Get Last Timestamp Activation of a sequence.
The simple usage is:
.. code-block:: python
seq = last_seq(input=layer)
:param agg_level: Aggregated level :param agg_level: Aggregated level
:param name: Layer name. :param name: Layer name.
:type name: basestring :type name: basestring
...@@ -1327,6 +1334,12 @@ def first_seq(input, ...@@ -1327,6 +1334,12 @@ def first_seq(input,
""" """
Get First Timestamp Activation of a sequence. Get First Timestamp Activation of a sequence.
The simple usage is:
.. code-block:: python
seq = first_seq(input=layer)
:param agg_level: aggregation level :param agg_level: aggregation level
:param name: Layer name. :param name: Layer name.
:type name: basestring :type name: basestring
...@@ -1427,7 +1440,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None): ...@@ -1427,7 +1440,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
.. code-block:: python .. code-block:: python
expand = repeat_layer(layer, 4) expand = repeat_layer(input=layer, num_repeats=4)
:param input: Input layer :param input: Input layer
:type input: LayerOutput :type input: LayerOutput
...@@ -1799,6 +1812,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): ...@@ -1799,6 +1812,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
Note that the above computation is for one sample. Multiple samples are Note that the above computation is for one sample. Multiple samples are
processed in one batch. processed in one batch.
The example usage is:
.. code-block:: python
cos = cos_sim(a=layer1, b=layer2, size=3)
:param name: layer name :param name: layer name
:type name: basestring :type name: basestring
:param a: input layer a :param a: input layer a
...@@ -1960,6 +1979,16 @@ def img_conv_layer(input, ...@@ -1960,6 +1979,16 @@ def img_conv_layer(input,
pieces. First 256/4 = 64 channels will process by first 32 filters. The pieces. First 256/4 = 64 channels will process by first 32 filters. The
rest channels will be processed by rest group of filters. rest channels will be processed by rest group of filters.
The example usage is:
.. code-block:: python
conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=16, stride=1,
bias_attr=False,
act=ReluActivation())
:param name: Layer name. :param name: Layer name.
:type name: basestring :type name: basestring
:param input: Layer Input. :param input: Layer Input.
...@@ -2099,6 +2128,34 @@ def img_pool_layer(input, ...@@ -2099,6 +2128,34 @@ def img_pool_layer(input,
.. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
- ceil_mode=True:
.. math::
w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride))
h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
- ceil_mode=False:
.. math::
w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride))
h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
The example usage is:
.. code-block:: python
maxpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
pool_type=MaxPooling())
:param padding: pooling padding width. :param padding: pooling padding width.
:type padding: int :type padding: int
:param padding_y: pooling padding height. It's equal to padding by default. :param padding_y: pooling padding height. It's equal to padding by default.
...@@ -2125,19 +2182,6 @@ def img_pool_layer(input, ...@@ -2125,19 +2182,6 @@ def img_pool_layer(input,
:param ceil_mode: Wether to use ceil mode to calculate output height and with. :param ceil_mode: Wether to use ceil mode to calculate output height and with.
Defalut is True. If set false, Otherwise use floor. Defalut is True. If set false, Otherwise use floor.
- ceil_mode=True:
.. math::
w = 1 + int(ceil(input_width + 2 * padding - pool_size) / float(stride))
h = 1 + int(ceil(input_height + 2 * padding_y - pool_size_y) / float(stride_y))
- ceil_mode=False:
.. math::
w = 1 + int(floor(input_width + 2 * padding - pool_size) / float(stride))
h = 1 + int(floor(input_height + 2 * padding_y - pool_size_y) / float(stride_y))
:type ceil_mode: bool :type ceil_mode: bool
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -2199,6 +2243,15 @@ def spp_layer(input, ...@@ -2199,6 +2243,15 @@ def spp_layer(input,
The details please refer to The details please refer to
`Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_. `Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
The example usage is:
.. code-block:: python
spp = spp_layer(input=data,
pyramid_height=2,
num_channels=16,
pool_type=MaxPooling())
:param name: layer name. :param name: layer name.
:type name: basestring :type name: basestring
:param input: layer's input. :param input: layer's input.
...@@ -2287,6 +2340,12 @@ def img_cmrnorm_layer(input, ...@@ -2287,6 +2340,12 @@ def img_cmrnorm_layer(input,
The details please refer to The details please refer to
`Alex's paper <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>`_. `Alex's paper <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>`_.
The example usage is:
.. code-block:: python
norm = img_cmrnorm_layer(input=net, size=5)
:param name: layer name. :param name: layer name.
:type name: None|basestring :type name: None|basestring
:param input: layer's input. :param input: layer's input.
...@@ -2342,6 +2401,12 @@ def batch_norm_layer(input, ...@@ -2342,6 +2401,12 @@ def batch_norm_layer(input,
The details of batch normalization please refer to this The details of batch normalization please refer to this
`paper <http://arxiv.org/abs/1502.03167>`_. `paper <http://arxiv.org/abs/1502.03167>`_.
The example usage is:
.. code-block:: python
norm = batch_norm_layer(input=net, act=ReluActivation())
:param name: layer name. :param name: layer name.
:type name: basestring :type name: basestring
:param input: batch normalization input. Better be linear activation. :param input: batch normalization input. Better be linear activation.
...@@ -3905,13 +3970,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -3905,13 +3970,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
.. code-block:: python .. code-block:: python
conv_shift = conv_shift_layer(input=[layer1, layer2]) conv_shift = conv_shift_layer(a=layer1, b=layer2)
:param name: layer name :param name: layer name
:type name: basestring :type name: basestring
:param a: Input layer a. :param a: Input layer a.
:type a: LayerOutput :type a: LayerOutput
:param b: input layer b :param b: input layer b.
:type b: LayerOutput :type b: LayerOutput
:param layer_attr: layer's extra attribute. :param layer_attr: layer's extra attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -4003,8 +4068,8 @@ def tensor_layer(a, ...@@ -4003,8 +4068,8 @@ def tensor_layer(a,
@wrap_act_default() @wrap_act_default()
@layer_support() @layer_support()
def selective_fc_layer(input, def selective_fc_layer(input,
select,
size, size,
select=None,
act=None, act=None,
name=None, name=None,
pass_generation=False, pass_generation=False,
...@@ -4031,6 +4096,7 @@ def selective_fc_layer(input, ...@@ -4031,6 +4096,7 @@ def selective_fc_layer(input,
:type input: LayerOutput|list|tuple :type input: LayerOutput|list|tuple
:param select: The select layer. The output of select layer should be a :param select: The select layer. The output of select layer should be a
sparse binary matrix, and treat as the mask of selective fc. sparse binary matrix, and treat as the mask of selective fc.
If is None, acts exactly like fc_layer.
:type select: LayerOutput :type select: LayerOutput
:param size: The layer dimension. :param size: The layer dimension.
:type size: int :type size: int
...@@ -4259,7 +4325,7 @@ def block_expand_layer(input, ...@@ -4259,7 +4325,7 @@ def block_expand_layer(input,
.. code-block:: python .. code-block:: python
block_expand = block_expand_layer(input, block_expand = block_expand_layer(input=layer,
num_channels=128, num_channels=128,
stride_x=1, stride_x=1,
stride_y=1, stride_y=1,
...@@ -4463,7 +4529,7 @@ def warp_ctc_layer(input, ...@@ -4463,7 +4529,7 @@ def warp_ctc_layer(input,
- You can set 'blank' to any value ranged in [0, num_classes], which - You can set 'blank' to any value ranged in [0, num_classes], which
should be consistent as that used in your labels. should be consistent as that used in your labels.
- As a native 'softmax' activation is interated to the warp-ctc library, - As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer. 'linear' activation is expected instead in the 'input' layer.
The simple usage: The simple usage:
...@@ -4596,6 +4662,13 @@ def crf_decoding_layer(input, ...@@ -4596,6 +4662,13 @@ def crf_decoding_layer(input,
this layer will also calculate error. output.value[i] is 1 for incorrect this layer will also calculate error. output.value[i] is 1 for incorrect
decoding or 0 for correct decoding. decoding or 0 for correct decoding.
The simple usage:
.. code-block:: python
crf_decoding = crf_decoding_layer(input=input,
size=label_dim)
:param input: The first input layer. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param size: size of this layer. :param size: size of this layer.
......
...@@ -18,13 +18,15 @@ import parameters ...@@ -18,13 +18,15 @@ import parameters
import trainer import trainer
import event import event
import data_type import data_type
import topology
import data_feeder import data_feeder
import attr import attr
import pooling
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
__all__ = [ __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'attr', 'data_feeder' 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'topology'
] ]
......
...@@ -23,7 +23,7 @@ class DataFeeder(DataProviderConverter): ...@@ -23,7 +23,7 @@ class DataFeeder(DataProviderConverter):
""" """
DataFeeder converts the data returned by paddle.reader into a data structure DataFeeder converts the data returned by paddle.reader into a data structure
of Arguments which is defined in the API. The paddle.reader usually returns of Arguments which is defined in the API. The paddle.reader usually returns
a list of mini-batch data entries. Each data entry in the list is one sampe. a list of mini-batch data entries. Each data entry in the list is one sample.
Each sample is a list or a tuple with one feature or multiple features. Each sample is a list or a tuple with one feature or multiple features.
DataFeeder converts this mini-batch data entries into Arguments in order DataFeeder converts this mini-batch data entries into Arguments in order
to feed it to C++ interface. to feed it to C++ interface.
......
...@@ -13,10 +13,10 @@ ...@@ -13,10 +13,10 @@
# limitations under the License. # limitations under the License.
from paddle.trainer.PyDataProvider2 import \ from paddle.trainer.PyDataProvider2 import \
InputType, dense_vector, sparse_binary_vector,\ InputType, DataType, dense_vector, sparse_binary_vector,\
sparse_vector, integer_value, integer_value_sequence sparse_vector, integer_value, integer_value_sequence
__all__ = [ __all__ = [
'InputType', 'dense_vector', 'sparse_binary_vector', 'sparse_vector', 'InputType', 'DataType', 'dense_vector', 'sparse_binary_vector',
'integer_value', 'integer_value_sequence' 'sparse_vector', 'integer_value', 'integer_value_sequence'
] ]
"""
CIFAR Dataset.
URL: https://www.cs.toronto.edu/~kriz/cifar.html
the default train_creator, test_creator used for CIFAR-10 dataset.
"""
import cPickle
import itertools
import tarfile
import numpy
from config import download
__all__ = [
'cifar_100_train_creator', 'cifar_100_test_creator', 'train_creator',
'test_creator'
]
CIFAR10_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
CIFAR100_URL = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
def __read_batch__(filename, sub_name):
def reader():
def __read_one_batch_impl__(batch):
data = batch['data']
labels = batch.get('labels', batch.get('fine_labels', None))
assert labels is not None
for sample, label in itertools.izip(data, labels):
yield (sample / 255.0).astype(numpy.float32), int(label)
with tarfile.open(filename, mode='r') as f:
names = (each_item.name for each_item in f
if sub_name in each_item.name)
for name in names:
batch = cPickle.load(f.extractfile(name))
for item in __read_one_batch_impl__(batch):
yield item
return reader
def cifar_100_train_creator():
fn = download(url=CIFAR100_URL, md5=CIFAR100_MD5)
return __read_batch__(fn, 'train')
def cifar_100_test_creator():
fn = download(url=CIFAR100_URL, md5=CIFAR100_MD5)
return __read_batch__(fn, 'test')
def train_creator():
"""
Default train reader creator. Use CIFAR-10 dataset.
"""
fn = download(url=CIFAR10_URL, md5=CIFAR10_MD5)
return __read_batch__(fn, 'data_batch')
def test_creator():
"""
Default test reader creator. Use CIFAR-10 dataset.
"""
fn = download(url=CIFAR10_URL, md5=CIFAR10_MD5)
return __read_batch__(fn, 'test_batch')
def unittest():
for _ in train_creator()():
pass
for _ in test_creator()():
pass
if __name__ == '__main__':
unittest()
import hashlib
import os import os
import shutil
import urllib2
__all__ = ['DATA_HOME'] __all__ = ['DATA_HOME', 'download']
DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set') DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set')
if not os.path.exists(DATA_HOME): if not os.path.exists(DATA_HOME):
os.makedirs(DATA_HOME) os.makedirs(DATA_HOME)
def download(url, md5):
filename = os.path.split(url)[-1]
assert DATA_HOME is not None
filepath = os.path.join(DATA_HOME, md5)
if not os.path.exists(filepath):
os.makedirs(filepath)
__full_file__ = os.path.join(filepath, filename)
def __file_ok__():
if not os.path.exists(__full_file__):
return False
md5_hash = hashlib.md5()
with open(__full_file__, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
md5_hash.update(chunk)
return md5_hash.hexdigest() == md5
while not __file_ok__():
response = urllib2.urlopen(url)
with open(__full_file__, mode='wb') as of:
shutil.copyfileobj(fsrc=response, fdst=of)
return __full_file__
import zipfile
from config import download
import re
import random
import functools
__all__ = ['train_creator', 'test_creator']
class MovieInfo(object):
def __init__(self, index, categories, title):
self.index = int(index)
self.categories = categories
self.title = title
def value(self):
return [
self.index, [CATEGORIES_DICT[c] for c in self.categories],
[MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
]
class UserInfo(object):
def __init__(self, index, gender, age, job_id):
self.index = int(index)
self.is_male = gender == 'M'
self.age = [1, 18, 25, 35, 45, 50, 56].index(int(age))
self.job_id = int(job_id)
def value(self):
return [self.index, 0 if self.is_male else 1, self.age, self.job_id]
MOVIE_INFO = None
MOVIE_TITLE_DICT = None
CATEGORIES_DICT = None
USER_INFO = None
def __initialize_meta_info__():
fn = download(
url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
md5='c4d9eecfca2ab87c1945afe126590906')
global MOVIE_INFO
if MOVIE_INFO is None:
pattern = re.compile(r'^(.*)\((\d+)\)$')
with zipfile.ZipFile(file=fn) as package:
for info in package.infolist():
assert isinstance(info, zipfile.ZipInfo)
MOVIE_INFO = dict()
title_word_set = set()
categories_set = set()
with package.open('ml-1m/movies.dat') as movie_file:
for i, line in enumerate(movie_file):
movie_id, title, categories = line.strip().split('::')
categories = categories.split('|')
for c in categories:
categories_set.add(c)
title = pattern.match(title).group(1)
MOVIE_INFO[int(movie_id)] = MovieInfo(
index=movie_id, categories=categories, title=title)
for w in title.split():
title_word_set.add(w.lower())
global MOVIE_TITLE_DICT
MOVIE_TITLE_DICT = dict()
for i, w in enumerate(title_word_set):
MOVIE_TITLE_DICT[w] = i
global CATEGORIES_DICT
CATEGORIES_DICT = dict()
for i, c in enumerate(categories_set):
CATEGORIES_DICT[c] = i
global USER_INFO
USER_INFO = dict()
with package.open('ml-1m/users.dat') as user_file:
for line in user_file:
uid, gender, age, job, _ = line.strip().split("::")
USER_INFO[int(uid)] = UserInfo(
index=uid, gender=gender, age=age, job_id=job)
return fn
def __reader__(rand_seed=0, test_ratio=0.1, is_test=False):
fn = __initialize_meta_info__()
rand = random.Random(x=rand_seed)
with zipfile.ZipFile(file=fn) as package:
with package.open('ml-1m/ratings.dat') as rating:
for line in rating:
if (rand.random() < test_ratio) == is_test:
uid, mov_id, rating, _ = line.strip().split("::")
uid = int(uid)
mov_id = int(mov_id)
rating = float(rating) * 2 - 5.0
mov = MOVIE_INFO[mov_id]
usr = USER_INFO[uid]
yield usr.value() + mov.value() + [[rating]]
def __reader_creator__(**kwargs):
return lambda: __reader__(**kwargs)
train_creator = functools.partial(__reader_creator__, is_test=False)
test_creator = functools.partial(__reader_creator__, is_test=True)
def unittest():
for train_count, _ in enumerate(train_creator()()):
pass
for test_count, _ in enumerate(test_creator()()):
pass
print train_count, test_count
if __name__ == '__main__':
unittest()
...@@ -82,10 +82,17 @@ import activation ...@@ -82,10 +82,17 @@ import activation
import attr import attr
__all__ = [ __all__ = [
'parse_network', 'data', 'fc', 'max_id', 'classification_cost', 'parse_network', 'data', 'fc', 'conv_shift', 'img_conv', 'img_pool', 'spp',
'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', 'maxout', 'img_cmrnorm', 'batch_norm', 'sum_to_one_norm', 'recurrent',
'lstmemory', 'grumemory', 'pool', 'last_seq', 'first_seq', 'concat',
'seq_concat', 'block_expand', 'expand', 'repeat', 'seq_reshape', 'addto',
'linear_comb', 'interpolation', 'bilinear_interp', 'power', 'scaling',
'slope_intercept', 'tensor', 'cos_sim', 'trans', 'max_id', 'sampling_id',
'pad', 'classification_cost', 'cross_entropy_cost',
'cross_entropy_with_selfnorm_cost', 'regression_cost',
'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost',
'sum_cost', 'huber_cost' 'sum_cost', 'huber_cost', 'crf', 'crf_decoding', 'ctc', 'warp_ctc', 'nce',
'hsigmoid', 'eos'
] ]
__projection_names__ = filter(lambda x: x.endswith('_projection'), __projection_names__ = filter(lambda x: x.endswith('_projection'),
...@@ -143,9 +150,9 @@ class Layer(object): ...@@ -143,9 +150,9 @@ class Layer(object):
raise NotImplementedError() raise NotImplementedError()
def __convert_to_v2__(method_name, name_prefix=None, parent_names=None): def __convert_to_v2__(method_name, parent_names, is_default_name=True):
if name_prefix is not None: if is_default_name:
wrapper = wrap_name_default(name_prefix=name_prefix) wrapper = wrap_name_default(name_prefix=method_name)
else: else:
wrapper = None wrapper = None
...@@ -277,44 +284,93 @@ def mixed(size=0, ...@@ -277,44 +284,93 @@ def mixed(size=0,
return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
LayerV2 = Layer
data = DataLayerV2 data = DataLayerV2
fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) AggregateLevel = conf_helps.layers.AggregateLevel
max_id = __convert_to_v2__( ExpandLevel = conf_helps.layers.ExpandLevel
'maxid_layer', name_prefix='maxid', parent_names=['input'])
classification_cost = __convert_to_v2__( layer_list = [
'classification_cost', # [V2LayerImpl, V1_method_name, parent_names]
name_prefix='classification_cost', # fully connected layers
parent_names=['input', 'label', 'weight']) ['fc', 'fc_layer', ['input']],
regression_cost = __convert_to_v2__( # conv layers
'regression_cost', ['conv_shift', 'conv_shift_layer', ['a', 'b']],
name_prefix='regression_cost', ['img_conv', 'img_conv_layer', ['input']],
parent_names=['input', 'label', 'weight']) # image pooling layers
cross_entropy_cost = __convert_to_v2__( ['img_pool', 'img_pool_layer', ['input']],
'cross_entropy', ['spp', 'spp_layer', ['input']],
name_prefix='cross_entropy', ['maxout', 'maxout_layer', ['input']],
parent_names=['input', 'label']) # norm layers
cross_entropy_with_selfnorm_cost = __convert_to_v2__( ['img_cmrnorm', 'img_cmrnorm_layer', ['input']],
'cross_entropy_with_selfnorm', ['batch_norm', 'batch_norm_layer', ['input']],
name_prefix='cross_entropy_with_selfnorm', ['sum_to_one_norm', 'sum_to_one_norm_layer', ['input']],
parent_names=['input', 'label']) # recurrent layers
multi_binary_label_cross_entropy_cost = __convert_to_v2__( ['recurrent', 'recurrent_layer', ['input']],
'multi_binary_label_cross_entropy', ['lstmemory', 'lstmemory', ['input']],
name_prefix='multi_binary_label_cross_entropy', ['grumemory', 'grumemory', ['input']],
parent_names=['input', 'label']) # aggregate layers
rank_cost = __convert_to_v2__( ['pool', 'pooling_layer', ['input']],
'rank_cost', ['last_seq', 'last_seq', ['input']],
name_prefix='rank_cost', ['first_seq', 'first_seq', ['input']],
parent_names=['left', 'right', 'label', 'weight']) ['concat', 'concat_layer', ['input']],
lambda_cost = __convert_to_v2__( ['seq_concat', 'seq_concat_layer', ['a', 'b']],
'lambda_cost', name_prefix='lambda_cost', parent_names=['input', 'score']) # reshaping layers
sum_cost = __convert_to_v2__( ['block_expand', 'block_expand_layer', ['input']],
'sum_cost', name_prefix='sum_cost', parent_names=['input']) ['expand', 'expand_layer', ['input', 'expand_as']],
huber_cost = __convert_to_v2__( ['repeat', 'repeat_layer', ['input']],
'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) ['rotate', 'rotate_layer', ['input']],
['seq_reshape', 'seq_reshape_layer', ['input']],
# math layers
['addto', 'addto_layer', ['input']],
['linear_comb', 'linear_comb_layer', ['weights', 'vectors']],
['interpolation', 'interpolation_layer', ['input', 'weight']],
['bilinear_interp', 'bilinear_interp_layer', ['input']],
['power', 'power_layer', ['input', 'weight']],
['scaling', 'scaling_layer', ['input', 'weight']],
['slope_intercept', 'slope_intercept_layer', ['input']],
['tensor', 'tensor_layer', ['a', 'b']],
['cos_sim', 'cos_sim', ['a', 'b']],
['trans', 'trans_layer', ['input']],
# sampling layers
['max_id', 'maxid_layer', ['input']],
['sampling_id', 'sampling_id_layer', ['input']],
# slicing and joining layers
['pad', 'pad_layer', ['input']],
# cost layers
[
'classification_cost', 'classification_cost',
['input', 'label', 'weight']
],
['regression_cost', 'regression_cost', ['input', 'label', 'weight']],
['cross_entropy_cost', 'cross_entropy', ['input', 'label']],
[
'cross_entropy_with_selfnorm_cost', 'cross_entropy_with_selfnorm',
['input', 'label']
],
[
'multi_binary_label_cross_entropy_cost',
'multi_binary_label_cross_entropy', ['input', 'label']
],
['rank_cost', 'rank_cost', ['left', 'right', 'label', 'weight']],
['lambda_cost', 'lambda_cost', ['input', 'score']],
['sum_cost', 'sum_cost', ['input']],
['huber_cost', 'huber_cost', ['input', 'label']],
['crf', 'crf_layer', ['input', 'label']],
['crf_decoding', 'crf_decoding_layer', ['input']],
['ctc', 'ctc_layer', ['input', 'label']],
['warp_ctc', 'warp_ctc_layer', ['input', 'label']],
['nce', 'nce_layer', ['input', 'label']],
['hsigmoid', 'hsigmoid', ['input', 'label']],
# check layers
['eos', 'eos_layer', ['input']]
]
for l in layer_list:
globals()[l[0]] = __convert_to_v2__(l[1], l[2])
# convert projection # convert projection
for prj in __projection_names__: for prj in __projection_names__:
globals()[prj] = __convert_to_v2__(prj, parent_names=['input']) globals()[prj] = __convert_to_v2__(
prj, parent_names=['input'], is_default_name=False)
# convert operator # convert operator
operator_list = [ operator_list = [
...@@ -323,4 +379,5 @@ operator_list = [ ...@@ -323,4 +379,5 @@ operator_list = [
['conv_operator', ['img', 'filter']] ['conv_operator', ['img', 'filter']]
] ]
for op in operator_list: for op in operator_list:
globals()[op[0]] = __convert_to_v2__(op[0], parent_names=op[1]) globals()[op[0]] = __convert_to_v2__(
op[0], parent_names=op[1], is_default_name=False)
import numpy as np import numpy as np
from . import layer as v2_layer
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
from topology import Topology
__all__ = ['Parameters', 'create'] __all__ = ['Parameters', 'create']
def create(*layers): def create(layers):
""" """
Create parameter pool by layers. In paddle, layer can be represent a Create parameter pool by topology.
model config.
:param layers: :param layers:
:return: :return:
""" """
for layer in layers: topology = Topology(layers)
if not isinstance(layer, v2_layer.Layer):
raise ValueError(
'create must pass a topologies which type is paddle.layer.Layer')
model_config = v2_layer.parse_network(*layers)
pool = Parameters() pool = Parameters()
for param in model_config.parameters: for param in topology.proto().parameters:
pool.__append_config__(param) pool.__append_config__(param)
return pool return pool
...@@ -224,7 +219,8 @@ class Parameters(object): ...@@ -224,7 +219,8 @@ class Parameters(object):
except ValueError: except ValueError:
# If no such parameter in gradient machine, then don't copy # If no such parameter in gradient machine, then don't copy
pass pass
self.__gradient_machines__.append(gradient_machine)
self.__gradient_machines__.append(gradient_machine)
def __get_parameter_in_gradient_machine__(gradient_machine, name): def __get_parameter_in_gradient_machine__(gradient_machine, name):
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers.poolings import *
__all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"]
Max = MaxPooling
CudnnMax = CudnnMaxPooling
Avg = AvgPooling
CudnnAvg = CudnnAvgPooling
Sum = SumPooling
SquareRootN = SquareRootNPooling
...@@ -2,5 +2,11 @@ add_test(NAME test_v2_layer ...@@ -2,5 +2,11 @@ add_test(NAME test_v2_layer
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
add_test(NAME test_v2_api add_test(NAME test_v2_api
COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE})
add_test(NAME topology_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
...@@ -19,16 +19,106 @@ import paddle.v2.activation as activation ...@@ -19,16 +19,106 @@ import paddle.v2.activation as activation
import paddle.v2.attr as attr import paddle.v2.attr as attr
import paddle.v2.data_type as data_type import paddle.v2.data_type as data_type
import paddle.v2.layer as layer import paddle.v2.layer as layer
import paddle.v2.pooling as pooling
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as parse_network
pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
label = layer.data(name='label', type=data_type.integer_value(10)) label = layer.data(name='label', type=data_type.integer_value(10))
weight = layer.data(name='weight', type=data_type.dense_vector(10)) weight = layer.data(name='weight', type=data_type.dense_vector(10))
score = layer.data(name='score', type=data_type.dense_vector(1)) score = layer.data(name='score', type=data_type.dense_vector(1))
hidden = layer.fc(input=pixel, hidden = layer.fc(input=pixel,
size=100, size=100,
act=activation.Sigmoid(), act=activation.Sigmoid(),
param_attr=attr.Param(name='hidden')) param_attr=attr.Param(name='hidden'))
inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) inference = layer.fc(input=hidden, size=10, act=activation.Softmax())
conv = layer.img_conv(
input=pixel,
filter_size=1,
filter_size_y=1,
num_channels=8,
num_filters=16,
act=activation.Linear())
class ImageLayerTest(unittest.TestCase):
def test_conv_layer(self):
conv_shift = layer.conv_shift(a=pixel, b=score)
print layer.parse_network(conv, conv_shift)
def test_pooling_layer(self):
maxpool = layer.img_pool(
input=conv,
pool_size=2,
num_channels=16,
padding=1,
pool_type=pooling.Max())
spp = layer.spp(input=conv,
pyramid_height=2,
num_channels=16,
pool_type=pooling.Max())
maxout = layer.maxout(input=conv, num_channels=16, groups=4)
print layer.parse_network(maxpool, spp, maxout)
def test_norm_layer(self):
norm1 = layer.img_cmrnorm(input=conv, size=5)
norm2 = layer.batch_norm(input=conv)
norm3 = layer.sum_to_one_norm(input=conv)
print layer.parse_network(norm1, norm2, norm3)
class AggregateLayerTest(unittest.TestCase):
def test_aggregate_layer(self):
pool = layer.pool(
input=pixel,
pooling_type=pooling.Avg(),
agg_level=layer.AggregateLevel.EACH_SEQUENCE)
last_seq = layer.last_seq(input=pixel)
first_seq = layer.first_seq(input=pixel)
concat = layer.concat(input=[last_seq, first_seq])
seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
class MathLayerTest(unittest.TestCase):
def test_math_layer(self):
addto = layer.addto(input=[pixel, pixel])
linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10)
interpolation = layer.interpolation(
input=[hidden, hidden], weight=score)
bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)
power = layer.power(input=pixel, weight=score)
scaling = layer.scaling(input=pixel, weight=score)
slope = layer.slope_intercept(input=pixel)
tensor = layer.tensor(a=pixel, b=pixel, size=1000)
cos_sim = layer.cos_sim(a=pixel, b=pixel)
trans = layer.trans(input=tensor)
print layer.parse_network(addto, linear_comb, interpolation, power,
scaling, slope, tensor, cos_sim, trans)
class ReshapeLayerTest(unittest.TestCase):
def test_reshape_layer(self):
block_expand = layer.block_expand(
input=conv, num_channels=4, stride_x=1, block_x=1)
expand = layer.expand(
input=weight,
expand_as=pixel,
expand_level=layer.ExpandLevel.FROM_TIMESTEP)
repeat = layer.repeat(input=pixel, num_repeats=4)
reshape = layer.seq_reshape(input=pixel, reshape_size=4)
rotate = layer.rotate(input=pixel, height=16, width=49)
print layer.parse_network(block_expand, expand, repeat, reshape, rotate)
class RecurrentLayerTest(unittest.TestCase):
def test_recurrent_layer(self):
word = layer.data(name='word', type=data_type.integer_value(12))
recurrent = layer.recurrent(input=word)
lstm = layer.lstmemory(input=word)
gru = layer.grumemory(input=word)
print layer.parse_network(recurrent, lstm, gru)
class CostLayerTest(unittest.TestCase): class CostLayerTest(unittest.TestCase):
...@@ -49,13 +139,35 @@ class CostLayerTest(unittest.TestCase): ...@@ -49,13 +139,35 @@ class CostLayerTest(unittest.TestCase):
cost10 = layer.sum_cost(input=inference) cost10 = layer.sum_cost(input=inference)
cost11 = layer.huber_cost(input=score, label=label) cost11 = layer.huber_cost(input=score, label=label)
print dir(layer) print layer.parse_network(cost1, cost2)
layer.parse_network(cost1, cost2) print layer.parse_network(cost3, cost4)
print dir(layer) print layer.parse_network(cost5, cost6)
#print layer.parse_network(cost3, cost4) print layer.parse_network(cost7, cost8, cost9, cost10, cost11)
#print layer.parse_network(cost5, cost6)
#print layer.parse_network(cost7, cost8, cost9, cost10, cost11) crf = layer.crf(input=inference, label=label)
crf_decoding = layer.crf_decoding(input=inference, size=3)
ctc = layer.ctc(input=inference, label=label)
warp_ctc = layer.warp_ctc(input=pixel, label=label)
nce = layer.nce(input=inference, label=label, num_classes=3)
hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3)
print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce,
hsigmoid)
class OtherLayerTest(unittest.TestCase):
def test_sampling_layer(self):
maxid = layer.max_id(input=inference)
sampling_id = layer.sampling_id(input=inference)
eos = layer.eos(input=maxid, eos_id=5)
print layer.parse_network(maxid, sampling_id, eos)
def test_slicing_joining_layer(self):
pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
print layer.parse_network(pad)
class ProjOpTest(unittest.TestCase):
def test_projection(self): def test_projection(self):
input = layer.data(name='data', type=data_type.dense_vector(784)) input = layer.data(name='data', type=data_type.dense_vector(784))
word = layer.data( word = layer.data(
......
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.v2.layer as layer
import paddle.v2.topology as topology
import paddle.v2.data_type as data_type
import paddle.trainer_config_helpers as conf_helps
class TestTopology(unittest.TestCase):
def test_data_type(self):
pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
label = layer.data(name='label', type=data_type.integer_value(10))
hidden = layer.fc(input=pixel,
size=100,
act=conf_helps.SigmoidActivation())
inference = layer.fc(input=hidden,
size=10,
act=conf_helps.SoftmaxActivation())
cost = layer.classification_cost(input=inference, label=label)
topo = topology.Topology(cost)
data_types = topo.data_type()
self.assertEqual(len(data_types), 2)
pixel_data_type = filter(lambda type: type[0] == "pixel", data_types)
self.assertEqual(len(pixel_data_type), 1)
pixel_data_type = pixel_data_type[0]
self.assertEqual(pixel_data_type[1].type, data_type.DataType.Dense)
self.assertEqual(pixel_data_type[1].dim, 784)
label_data_type = filter(lambda type: type[0] == "label", data_types)
self.assertEqual(len(label_data_type), 1)
label_data_type = label_data_type[0]
self.assertEqual(label_data_type[1].type, data_type.DataType.Index)
self.assertEqual(label_data_type[1].dim, 10)
def test_get_layer(self):
pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
label = layer.data(name='label', type=data_type.integer_value(10))
hidden = layer.fc(input=pixel,
size=100,
act=conf_helps.SigmoidActivation())
inference = layer.fc(input=hidden,
size=10,
act=conf_helps.SoftmaxActivation())
cost = layer.classification_cost(input=inference, label=label)
topo = topology.Topology(cost)
pixel_layer = topo.get_layer("pixel")
label_layer = topo.get_layer("label")
self.assertEqual(pixel_layer, pixel)
self.assertEqual(label_layer, label)
def test_parse(self):
pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
label = layer.data(name='label', type=data_type.integer_value(10))
hidden = layer.fc(input=pixel,
size=100,
act=conf_helps.SigmoidActivation())
inference = layer.fc(input=hidden,
size=10,
act=conf_helps.SoftmaxActivation())
maxid = layer.max_id(input=inference)
cost1 = layer.classification_cost(input=inference, label=label)
cost2 = layer.cross_entropy_cost(input=inference, label=label)
topology.Topology(cost2).proto()
topology.Topology([cost1]).proto()
topology.Topology([cost1, cost2]).proto()
topology.Topology([inference, maxid]).proto()
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig
import layer as v2_layer
__all__ = ['Topology']
class Topology(object):
"""
Topology is used to store the information about all layers
and network configs.
"""
def __init__(self, layers):
if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers)
layers = [layers]
for layer in layers:
__check_layer_type__(layer)
self.layers = layers
self.__model_config__ = v2_layer.parse_network(*layers)
assert isinstance(self.__model_config__, ModelConfig)
def proto(self):
return self.__model_config__
def get_layer(self, name):
"""
get v2.Layer Class instance by layer name
:param name:
:return:
"""
result_layer = []
def find_layer_by_name(layer, layer_name):
if len(result_layer) == 1:
return
elif layer.name == layer_name:
result_layer.append(layer)
else:
for parent_layer in layer.__parent_layers__.values():
find_layer_by_name(parent_layer, layer_name)
for layer in self.layers:
find_layer_by_name(layer, name)
assert len(result_layer) == 1
return result_layer[0]
def data_layers(self):
"""
get all data layer
:return:
"""
data_layers = set()
def find_data_layer(layer):
if isinstance(layer, v2_layer.DataLayerV2):
data_layers.add(layer)
for parent_layer in layer.__parent_layers__.values():
find_data_layer(parent_layer)
for layer in self.layers:
find_data_layer(layer)
return data_layers
def data_type(self):
"""
get data_type from proto, such as:
[('image', dense_vector(768)), ('label', integer_value(10))]
"""
return [(data_layer.name, data_layer.type)
for data_layer in self.data_layers()]
def __check_layer_type__(layer):
if not isinstance(layer, v2_layer.LayerV2):
raise ValueError('layer should have type paddle.layer.Layer')
import collections import collections
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
from paddle.proto.ModelConfig_pb2 import ModelConfig
from data_feeder import DataFeeder
from data_feeder import DataFeeder
from topology import Topology
from . import event as v2_event from . import event as v2_event
from . import layer as v2_layer
from . import optimizer as v2_optimizer from . import optimizer as v2_optimizer
from . import parameters as v2_parameters from . import parameters as v2_parameters
...@@ -30,7 +29,7 @@ class ITrainer(object): ...@@ -30,7 +29,7 @@ class ITrainer(object):
def train(self, def train(self,
train_data_reader, train_data_reader,
topology, cost,
parameters, parameters,
test_data_reader=None, test_data_reader=None,
event_handler=None): event_handler=None):
...@@ -38,7 +37,7 @@ class ITrainer(object): ...@@ -38,7 +37,7 @@ class ITrainer(object):
train method. train method.
:param train_data_reader: :param train_data_reader:
:param topology: :param cost:
:param parameters: :param parameters:
:param test_data_reader: :param test_data_reader:
:param event_handler: :param event_handler:
...@@ -63,19 +62,18 @@ class SGD(ITrainer): ...@@ -63,19 +62,18 @@ class SGD(ITrainer):
def train(self, def train(self,
train_data_reader, train_data_reader,
topology, cost,
parameters, parameters,
num_passes=1, num_passes=1,
test_data_reader=None, test_data_reader=None,
event_handler=None, event_handler=None,
batch_size=32, batch_size=32,
data_types=None,
reader_dict=None): reader_dict=None):
""" """
Training method. Will train num_passes of input data. Training method. Will train num_passes of input data.
:param train_data_reader: :param train_data_reader:
:param topology: Network Topology, use one or more Layers to represent it. :param cost: cost layers, to be optimized.
:param parameters: The parameter pools. :param parameters: The parameter pools.
:param num_passes: The total train passes. :param num_passes: The total train passes.
:param test_data_reader: :param test_data_reader:
...@@ -83,18 +81,18 @@ class SGD(ITrainer): ...@@ -83,18 +81,18 @@ class SGD(ITrainer):
occurred. occurred.
:type event_handler: (BaseEvent) => None :type event_handler: (BaseEvent) => None
:param batch_size: Not important, will be removed after data refactor. :param batch_size: Not important, will be removed after data refactor.
:param data_types: Not important, will be removed after data refactor.
:return: :return:
""" """
if event_handler is None: if event_handler is None:
event_handler = default_event_handler event_handler = default_event_handler
topology = v2_layer.parse_network(topology) topology = Topology(cost)
__check_train_args__(**locals()) __check_train_args__(**locals())
gm = api.GradientMachine.createFromConfigProto( gm = api.GradientMachine.createFromConfigProto(
topology, api.CREATE_MODE_NORMAL, self.__optimizer__.enable_types()) topology.proto(), api.CREATE_MODE_NORMAL,
self.__optimizer__.enable_types())
assert isinstance(gm, api.GradientMachine) assert isinstance(gm, api.GradientMachine)
parameters.append_gradient_machine(gm) parameters.append_gradient_machine(gm)
gm.randParameters() gm.randParameters()
...@@ -108,7 +106,7 @@ class SGD(ITrainer): ...@@ -108,7 +106,7 @@ class SGD(ITrainer):
assert isinstance(pass_evaluator, api.Evaluator) assert isinstance(pass_evaluator, api.Evaluator)
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
feeder = DataFeeder(data_types, reader_dict) feeder = DataFeeder(topology.data_type(), reader_dict)
for pass_id in xrange(num_passes): for pass_id in xrange(num_passes):
event_handler(v2_event.BeginPass(pass_id)) event_handler(v2_event.BeginPass(pass_id))
...@@ -154,7 +152,7 @@ def __data_reader_to_batch__(reader, batch_size, topology): ...@@ -154,7 +152,7 @@ def __data_reader_to_batch__(reader, batch_size, topology):
def input_reorder(func): def input_reorder(func):
for item in func(): for item in func():
retv = [] retv = []
for __layer_name__ in topology.input_layer_names: for __layer_name__ in topology.proto().input_layer_names:
retv.append(item[__layer_name__]) retv.append(item[__layer_name__])
yield retv yield retv
...@@ -191,7 +189,7 @@ def __check_train_args__(train_data_reader, topology, parameters, ...@@ -191,7 +189,7 @@ def __check_train_args__(train_data_reader, topology, parameters,
raise ValueError('test_data_reader should be a function, which can ' raise ValueError('test_data_reader should be a function, which can '
'return a iterator') 'return a iterator')
if not isinstance(topology, ModelConfig): if not isinstance(topology, Topology):
raise ValueError('topology should be a model config') raise ValueError('topology should be a model config')
if not isinstance(parameters, v2_parameters.Parameters): if not isinstance(parameters, v2_parameters.Parameters):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册