diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index d548d1adaafacdb097dbe476fdc76651c9f46b6b..8ab8cd2f85d5d7bcf86c2f57b350dfcd99177b69 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,6 +20,7 @@ import event import data_type import topology import data_feeder +import networks from . import dataset from . import reader import attr diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index 035f96b0f2e978a413a1ebe0ec115f75ff07befc..fa2ccec6c3270541dd6b13fdfd2323d10ceac642 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -22,6 +22,7 @@ class Layer(object): def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name + self.__contex__ = {} self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -39,16 +40,38 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name is None: + if self.context_name() is None: return self.to_proto_impl(**kwargs) - elif self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) - - return context[self.name] + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl(**kwargs) + self.__contex__ = context + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() + def context_name(self): + """ + Context name means the context which stores `to_proto_impl` result. + If multiple layer share same context_name, the `to_proto_impl` of them + will be invoked only once. + """ + return self.name + + def use_context_name(self): + return False + + def calculate_size(self): + """ + lazy calculate size of the layer, should be called when to_proto_impl of + this layer is called. + :return: + """ + return self.__contex__[self.context_name()].size + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index f96e1f742a8bdd90517dc5c537666e32849ac073..a0e194c650bebf70b956cfff6be5c75fe8d9f83b 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import mnist import imikolov import imdb diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 77c54bd268b5d988b0802a3edca91605e56f730e..5c6f5d85567fa19f2835ee4f3951531b6dfd3209 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -1,6 +1,20 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html """ + import cPickle import itertools import numpy diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index fcf4437ffaf329f52cc5bc997eff45dee200873c..397c9e66d495431f412c22b9b1d19ee32257b2dd 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import requests import hashlib import os diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py new file mode 100644 index 0000000000000000000000000000000000000000..7874161a05968921a8e6789a7ff6a229d6cc6c01 --- /dev/null +++ b/python/paddle/v2/dataset/conll05.py @@ -0,0 +1,205 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2.dataset.common +import tarfile +import gzip +import itertools + +__all__ = ['test, get_dict', 'get_embedding'] +""" +Conll 2005 dataset. Paddle semantic role labeling Book and demo use this +dataset as an example. Because Conll 2005 is not free in public, the default +downloaded URL is test set of Conll 2005 (which is public). Users can change +URL and MD5 to their Conll dataset. +""" + +DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' +DATA_MD5 = '387719152ae52d60422c016e92a742fc' +WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt' +WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa' +VERBDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt' +VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c' +TRGDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt' +TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751' +EMB_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb' +EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7' + +UNK_IDX = 0 + + +def load_dict(filename): + d = dict() + with open(filename, 'r') as f: + for i, line in enumerate(f): + d[line.strip()] = i + return d + + +def corpus_reader(data_path, words_name, props_name): + """ + Read one corpus. It returns an iterator. Each element of + this iterator is a tuple including sentence and labels. The sentence is + consist of a list of word IDs. The labels include a list of label IDs. + :return: a iterator of data. + :rtype: iterator + """ + + def reader(): + tf = tarfile.open(data_path) + wf = tf.extractfile(words_name) + pf = tf.extractfile(props_name) + with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile( + fileobj=pf) as props_file: + sentences = [] + labels = [] + one_seg = [] + for word, label in itertools.izip(words_file, props_file): + word = word.strip() + label = label.strip().split() + + if len(label) == 0: # end of sentence + for i in xrange(len(one_seg[0])): + a_kind_lable = [x[i] for x in one_seg] + labels.append(a_kind_lable) + + if len(labels) >= 1: + verb_list = [] + for x in labels[0]: + if x != '-': + verb_list.append(x) + + for i, lbl in enumerate(labels[1:]): + cur_tag = 'O' + is_in_bracket = False + lbl_seq = [] + verb_word = '' + for l in lbl: + if l == '*' and is_in_bracket == False: + lbl_seq.append('O') + elif l == '*' and is_in_bracket == True: + lbl_seq.append('I-' + cur_tag) + elif l == '*)': + lbl_seq.append('I-' + cur_tag) + is_in_bracket = False + elif l.find('(') != -1 and l.find(')') != -1: + cur_tag = l[1:l.find('*')] + lbl_seq.append('B-' + cur_tag) + is_in_bracket = False + elif l.find('(') != -1 and l.find(')') == -1: + cur_tag = l[1:l.find('*')] + lbl_seq.append('B-' + cur_tag) + is_in_bracket = True + else: + raise RuntimeError('Unexpected label: %s' % + l) + + yield sentences, verb_list[i], lbl_seq + + sentences = [] + labels = [] + one_seg = [] + else: + sentences.append(word) + one_seg.append(label) + + pf.close() + wf.close() + tf.close() + + return reader + + +def reader_creator(corpus_reader, + word_dict=None, + predicate_dict=None, + label_dict=None): + def reader(): + for sentence, predicate, labels in corpus_reader(): + + sen_len = len(sentence) + + verb_index = labels.index('B-V') + mark = [0] * len(labels) + if verb_index > 0: + mark[verb_index - 1] = 1 + ctx_n1 = sentence[verb_index - 1] + else: + ctx_n1 = 'bos' + + if verb_index > 1: + mark[verb_index - 2] = 1 + ctx_n2 = sentence[verb_index - 2] + else: + ctx_n2 = 'bos' + + mark[verb_index] = 1 + ctx_0 = sentence[verb_index] + + if verb_index < len(labels) - 1: + mark[verb_index + 1] = 1 + ctx_p1 = sentence[verb_index + 1] + else: + ctx_p1 = 'eos' + + if verb_index < len(labels) - 2: + mark[verb_index + 2] = 1 + ctx_p2 = sentence[verb_index + 2] + else: + ctx_p2 = 'eos' + + word_idx = [word_dict.get(w, UNK_IDX) for w in sentence] + pred_idx = [predicate_dict.get(predicate)] * sen_len + + ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len + ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len + ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len + ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len + ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len + + label_idx = [label_dict.get(w) for w in labels] + + yield word_idx, pred_idx, ctx_n2_idx, ctx_n1_idx, \ + ctx_0_idx, ctx_p1_idx, ctx_p2_idx, mark, label_idx + + return reader() + + +def get_dict(): + word_dict = load_dict( + common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) + verb_dict = load_dict( + common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) + label_dict = load_dict( + common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) + return word_dict, verb_dict, label_dict + + +def get_embedding(): + return common.download(EMB_URL, 'conll05st', EMB_MD5) + + +def test(): + word_dict, verb_dict, label_dict = get_dict() + reader = corpus_reader( + common.download(DATA_URL, 'conll05st', DATA_MD5), + words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', + props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') + return reader_creator(reader, word_dict, verb_dict, label_dict) + + +if __name__ == '__main__': + print get_embedding() + for f in test(): + print f diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 433e37380f840f5b7ff619a5f64b99d2ad724b17..ffd7d89049358e979db762af07701e010f40dc6e 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -1,6 +1,3 @@ -# /usr/bin/env python -# -*- coding:utf-8 -*- - # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,6 +14,7 @@ """ IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz """ + import paddle.v2.dataset.common import tarfile import Queue diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index b3791ddad66e588356338150fccadbcc8fa113ca..285d3eaca8317c78dc84e99b4d524a0f4872c687 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -1,3 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ """ diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index ebcdff78b317ceb4811048ac78982e072962fa9c..6a621a2aaad14bf9598b838ce7c2ebf297bb0d30 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -1,3 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ MNIST dataset. """ diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index dcffcff2f58c63d451761d37f14127d730faf621..c22bcfa38b5f501732768dd4f62d8e088d57a7ff 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import zipfile from common import download import re diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/v2/dataset/tests/cifar_test.py index a2af45ecf508462fe4b596b5d8d6401c5b974eff..e0e18229da7818be5752ee592e094a00da286ad9 100644 --- a/python/paddle/v2/dataset/tests/cifar_test.py +++ b/python/paddle/v2/dataset/tests/cifar_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.cifar import unittest diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/v2/dataset/tests/common_test.py index 7d8406171b8478e4a8331637c5e867c18d5eb3d8..5babcef0eb4345d243904877d323c37d4889a643 100644 --- a/python/paddle/v2/dataset/tests/common_test.py +++ b/python/paddle/v2/dataset/tests/common_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.common import unittest import tempfile diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/v2/dataset/tests/imdb_test.py index e887af16634d2db04b8cf5fa0269a69991d8baac..c4d82f26895d77d05c6e936bd636b1239e1a0cd8 100644 --- a/python/paddle/v2/dataset/tests/imdb_test.py +++ b/python/paddle/v2/dataset/tests/imdb_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.imdb import unittest import re diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/v2/dataset/tests/mnist_test.py index b4408cc2f590d4d8da4ce5e98213cf7b208cfc15..1d344cac3e7483a351033570fbec75a4d19f4a55 100644 --- a/python/paddle/v2/dataset/tests/mnist_test.py +++ b/python/paddle/v2/dataset/tests/mnist_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.mnist import unittest diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 67111f1315fbb0f55c1db0f6fe89fc988c8d83f6..2f55611aaa1d3ae22f5d7f184b38e622271881ea 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -65,19 +65,24 @@ to be in a Python function but could be anywhere. Also, the creation of a protobuf message is hidden in the invocation of paddle.v2.parameters.create, no longer exposed to users. """ + +import collections +import inspect from config_base import Layer, __convert_to_v2__ import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ - -from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_act_default from paddle.trainer_config_helpers.default_decorators import \ wrap_bias_attr_default +from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.layers import layer_support +from paddle.trainer.config_parser import \ + RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \ + RecurrentLayerGroupEnd, model_type -import data_type import activation +import data_type __all__ = ['parse_network', 'data'] @@ -130,6 +135,137 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +class WithExtraParent(Layer): + def extra_parent(self): + return self.__extra_parent__ + + def __init__(self, name=None, parent_layers=None): + self.__extra_parent__ = [] + super(WithExtraParent, self).__init__( + name=name, parent_layers=parent_layers) + + def append_extra_parent(self, parent): + self.__extra_parent__.append(parent) + + def to_proto(self, context): + """ + function to set proto attribute + """ + kwargs = dict() + for p in self.__extra_parent__: + p.to_proto(context=context) + + for layer_name in self.__parent_layers__: + if not isinstance(self.__parent_layers__[layer_name], + collections.Sequence): + v1_layer = self.__parent_layers__[layer_name].to_proto( + context=context) + else: + v1_layer = map(lambda x: x.to_proto(context=context), + self.__parent_layers__[layer_name]) + kwargs[layer_name] = v1_layer + + if self.context_name() is None: + return self.to_proto_impl(context=context, **kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl( + context=context, **kwargs) + + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] + + +class MemoryV2(WithExtraParent): + def __init__(self, name, **kwargs): + self.name = name + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + self.__kwargs__ = kwargs + self.__boot_layer_name__ = None + if 'boot_layer' in kwargs: + begin_of_current_rnn = [] + # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a + # function inside step. + st = inspect.stack() + for i in xrange(len(st)): + locs = inspect.stack()[i][0].f_locals + keys = locs.keys() + for key in keys: + val = locs[key] + if isinstance(val, RecurrentLayerInput): + begin_of_current_rnn.append(val) + elif isinstance(val, collections.Sequence): + for v in val: + if isinstance(v, RecurrentLayerInput): + begin_of_current_rnn.append(v) + + if begin_of_current_rnn: + break + assert begin_of_current_rnn is not None + for extra in begin_of_current_rnn: + self.append_extra_parent(extra) + assert isinstance(extra, WithExtraParent) + extra.append_extra_parent(kwargs['boot_layer']) + self.__boot_layer_name__ = kwargs['boot_layer'].name + + def to_proto_impl(self, context, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + + if self.__boot_layer_name__ is not None: + args['boot_layer'] = context[self.__boot_layer_name__] + + size = args.get('size', None) + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size + return conf_helps.memory(name=self.name, **args) + + def context_name(self): + return self.name + "#memory" + + def use_context_name(self): + """ + memory layer will have the same name with some layer + :return: + """ + return True + + +class LayerOutputV2(Layer): + """ + LayerOutputV2 is used to store the result of LayerOutput in v1 api. + It will not store it's parents because layer_output has been parsed already. + """ + + def __init__(self, layer_output): + assert isinstance(layer_output, conf_helps.LayerOutput) + self.layer_output = layer_output + super(LayerOutputV2, self).__init__( + name=layer_output.name, parent_layers=dict()) + + def to_proto_impl(self): + return self.layer_output + + +class StaticInputV2(object): + def __init__(self, input, is_seq=False, size=None): + assert isinstance(input, LayerV2) + self.name = input.name + self.input = input + self.is_seq = is_seq + self.size = size + # TODO(qiaolongfei): add size + # assert input.size is not None or size is not None + + class MixedLayerV2(Layer): """ This class is use to support `with` grammar. If not, the following code @@ -161,7 +297,6 @@ class MixedLayerV2(Layer): other_kwargs['act'] = act other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr - parent_layers = {"input": self.__inputs__} super(MixedLayerV2, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs @@ -171,7 +306,7 @@ class MixedLayerV2(Layer): self.__inputs__.append(other) return self else: - raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + raise MixedLayerV2.AddToSealedMixedLayerExceptionV2() def __enter__(self): assert len(self.__inputs__) == 0 @@ -186,6 +321,13 @@ class MixedLayerV2(Layer): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] + size = args.get('size', None) + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size return getattr(conf_helps, self.__method_name__)(**args) @@ -202,14 +344,51 @@ def mixed(size=0, return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) +class RecurrentLayerInput(WithExtraParent): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + super(RecurrentLayerInput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".begin" + + def to_proto_impl(self, context, **kwargs): + model_type('recurrent_nn') + RecurrentLayerGroupWithoutOutLinksBegin( + name=self.__recurrent_name__, + in_links=map(lambda x: x.name, self.__parents__)) + return self + + +class RecurrentLayerOutput(Layer): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + super(RecurrentLayerOutput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".end" + + def to_proto_impl(self, **kwargs): + for l in self.__parents__: + RecurrentLayerGroupSetOutLink(l.name) + RecurrentLayerGroupEnd(name=self.__recurrent_name__) + + LayerV2 = Layer data = DataLayerV2 AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel +memory = MemoryV2 def __layer_name_mapping__(inname): - if inname in ['data_layer', 'memory', 'mixed_layer']: + if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']: # Do Not handle these layers return elif inname == 'maxid_layer': @@ -231,8 +410,10 @@ def __layer_name_mapping__(inname): def __layer_name_mapping_parent_names__(inname): all_args = getattr(conf_helps, inname).argspec.args return filter( - lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as', - 'weights', 'vectors', 'weight', 'score', 'left', 'right'], + lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', + 'expand_as', + 'weights', 'vectors', 'weight', 'score', 'left', + 'right', 'output_mem'], all_args) @@ -267,3 +448,54 @@ operator_list = [ for op in operator_list: globals()[op[0]] = __convert_to_v2__( op[0], parent_names=op[1], is_default_name=False) + + +@wrap_name_default() +def recurrent_group(step, input, name=None): + if not isinstance(input, collections.Sequence): + input = [input] + + non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2), + input) + actual_input = [ + RecurrentLayerInput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_inputs': non_static_inputs}) + for i in xrange(len(non_static_inputs)) + ] + + def __real_step__(*args): + rnn_input = list(args) + static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) + for static_input in static_inputs: + mem_name = "__%s_memory__" % static_input.input.name + mem = memory( + name=mem_name, + is_seq=static_input.is_seq, + size=static_input.input.calculate_size, + boot_layer=static_input.input) + with mixed( + name=mem_name, + size=static_input.input.calculate_size, + act=activation.Identity()) as mix: + mix += identity_projection(input=mem) + rnn_input.insert(input.index(static_input), mix) + return step(*rnn_input) + + actual_output = __real_step__(*actual_input) + + if not isinstance(actual_output, collections.Sequence): + actual_output = [actual_output] + + retv = [ + RecurrentLayerOutput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_outputs': actual_output}) + for i in xrange(len(actual_output)) + ] + if len(retv) == 1: + return retv[0] + else: + return retv diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index 46b5d08b8761ea58530f5aefe5d1947408727f85..572deaff356712cac23cd7911cdf289db100564c 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,12 +1,16 @@ +add_test(NAME test_v2_api + COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) + add_test(NAME test_v2_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) -add_test(NAME test_v2_api - COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) +add_test(NAME test_v2_rnn_layer + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) -add_test(NAME topology_test +add_test(NAME test_topology COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..5fbbd20eb76bb9daab2bcf98c4adad989106a377 --- /dev/null +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -0,0 +1,155 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def parse_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def parse_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data, size=word_dim) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + def test_sequence_rnn_multi_input(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + label_dim = 3 + + def parse_old_rnn(): + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + label = conf_helps.data_layer(name="label", size=label_dim) + emb = conf_helps.embedding_layer(input=data, size=word_dim) + boot_layer = conf_helps.data_layer(name="boot", size=10) + boot_layer = conf_helps.fc_layer( + name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory( + name="rnn_state", + size=hidden_dim, + boot_layer=boot_layer) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + + out = conf_helps.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = conf_helps.last_seq(input=out) + prob = conf_helps.fc_layer( + size=label_dim, + input=rep, + act=conf_helps.SoftmaxActivation(), + bias_attr=True) + + conf_helps.outputs( + conf_helps.classification_cost( + input=prob, label=label)) + + return str(parse_network(test)) + + def parse_new_rnn(): + data = layer.data( + name="word", type=data_type.dense_vector(dict_dim)) + label = layer.data( + name="label", type=data_type.dense_vector(label_dim)) + emb = layer.embedding(input=data, size=word_dim) + boot_layer = layer.data( + name="boot", type=data_type.dense_vector(10)) + boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = layer.embedding(input=wid, size=word_dim) + mem = layer.memory( + name="rnn_state", size=hidden_dim, boot_layer=boot_layer) + out = layer.fc(input=[y, z, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + out = layer.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = layer.last_seq(input=out) + prob = layer.fc(size=label_dim, + input=rep, + act=activation.Softmax(), + bias_attr=True) + + cost = layer.classification_cost(input=prob, label=label) + + return str(layer.parse_network(cost)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + +if __name__ == '__main__': + unittest.main()