提交 35ec5f0f 编写于 作者: Q qiaolongfei

Support StaticInput

上级 5fc572c2
...@@ -20,6 +20,7 @@ import event ...@@ -20,6 +20,7 @@ import event
import data_type import data_type
import topology import topology
import data_feeder import data_feeder
import networks
from . import dataset from . import dataset
from . import reader from . import reader
import attr import attr
...@@ -29,7 +30,7 @@ import py_paddle.swig_paddle as api ...@@ -29,7 +30,7 @@ import py_paddle.swig_paddle as api
__all__ = [ __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader',
'topology' 'topology', 'networks'
] ]
......
...@@ -109,9 +109,10 @@ def parse_network(*outputs): ...@@ -109,9 +109,10 @@ def parse_network(*outputs):
class Layer(object): class Layer(object):
def __init__(self, name=None, parent_layers=None): def __init__(self, name=None, size=None, parent_layers=None):
assert isinstance(parent_layers, dict) assert isinstance(parent_layers, dict)
self.name = name self.name = name
self.size = size
self.__parent_layers__ = parent_layers self.__parent_layers__ = parent_layers
def to_proto(self, context): def to_proto(self, context):
...@@ -173,7 +174,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True): ...@@ -173,7 +174,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True):
other_kwargs[key] = kwargs[key] other_kwargs[key] = kwargs[key]
name = kwargs.get('name', None) name = kwargs.get('name', None)
super(V2LayerImpl, self).__init__(name, parent_layers) size = kwargs.get('size', None)
super(V2LayerImpl, self).__init__(name, size, parent_layers)
self.__other_kwargs__ = other_kwargs self.__other_kwargs__ = other_kwargs
if wrapper is not None: if wrapper is not None:
...@@ -220,9 +222,10 @@ class WithExtraParent(Layer): ...@@ -220,9 +222,10 @@ class WithExtraParent(Layer):
def extra_parent(self): def extra_parent(self):
return self.__extra_parent__ return self.__extra_parent__
def __init__(self, name=None, parent_layers=None): def __init__(self, name=None, size=None, parent_layers=None):
self.__extra_parent__ = [] self.__extra_parent__ = []
super(WithExtraParent, self).__init__(name, parent_layers) super(WithExtraParent, self).__init__(
name=name, size=size, parent_layers=parent_layers)
def append_extra_parent(self, parent): def append_extra_parent(self, parent):
self.__extra_parent__.append(parent) self.__extra_parent__.append(parent)
...@@ -261,7 +264,8 @@ class MemoryV2(WithExtraParent): ...@@ -261,7 +264,8 @@ class MemoryV2(WithExtraParent):
def __init__(self, name, size, **kwargs): def __init__(self, name, size, **kwargs):
self.name = name self.name = name
self.size = size self.size = size
super(MemoryV2, self).__init__(name=name, parent_layers=dict()) super(MemoryV2, self).__init__(
name=name, size=size, parent_layers=dict())
self.__kwargs__ = kwargs self.__kwargs__ = kwargs
self.__boot_layer_name__ = None self.__boot_layer_name__ = None
if 'boot_layer' in kwargs: if 'boot_layer' in kwargs:
...@@ -271,7 +275,9 @@ class MemoryV2(WithExtraParent): ...@@ -271,7 +275,9 @@ class MemoryV2(WithExtraParent):
st = inspect.stack() st = inspect.stack()
for i in xrange(len(st)): for i in xrange(len(st)):
locs = inspect.stack()[i][0].f_locals locs = inspect.stack()[i][0].f_locals
for val in locs.viewvalues(): keys = locs.keys()
for key in keys:
val = locs[key]
if isinstance(val, RecurrentLayerInput): if isinstance(val, RecurrentLayerInput):
begin_of_current_rnn.append(val) begin_of_current_rnn.append(val)
...@@ -322,21 +328,15 @@ class LayerOutputV2(Layer): ...@@ -322,21 +328,15 @@ class LayerOutputV2(Layer):
return self.layer_output return self.layer_output
class StaticInputV2(Layer): class StaticInputV2(object):
def __init__(self, input=None, **kwargs): def __init__(self, input, is_seq=False, size=None):
assert input is not None assert isinstance(input, LayerV2)
self.__kwargs__ = kwargs self.name = input.name
super(StaticInputV2, self).__init__( self.input = input
name=input.name, parent_layers={'input': input}) self.is_seq = is_seq
self.size = size
def context_name(self): # TODO(qiaolongfei): add size
return self.name + "#static_input" # assert input.size is not None or size is not None
def to_proto_impl(self, **kwargs):
args = dict()
args.update(kwargs)
args.update(self.__kwargs__)
return conf_helps.StaticInput(**args)
class MixedLayerV2(Layer): class MixedLayerV2(Layer):
...@@ -370,9 +370,8 @@ class MixedLayerV2(Layer): ...@@ -370,9 +370,8 @@ class MixedLayerV2(Layer):
other_kwargs['act'] = act other_kwargs['act'] = act
other_kwargs['bias_attr'] = bias_attr other_kwargs['bias_attr'] = bias_attr
other_kwargs['layer_attr'] = layer_attr other_kwargs['layer_attr'] = layer_attr
parent_layers = {"input": self.__inputs__} parent_layers = {"input": self.__inputs__}
super(MixedLayerV2, self).__init__(name, parent_layers) super(MixedLayerV2, self).__init__(name, size, parent_layers)
self.__other_kwargs__ = other_kwargs self.__other_kwargs__ = other_kwargs
def __iadd__(self, other): def __iadd__(self, other):
...@@ -452,6 +451,12 @@ def recurrent_group(step, input, name=None): ...@@ -452,6 +451,12 @@ def recurrent_group(step, input, name=None):
if not isinstance(input, collections.Sequence): if not isinstance(input, collections.Sequence):
input = [input] input = [input]
# TODO(qiaolongfei) convert StaticInput to memory according to v2 recurrent_group
for i in xrange(len(input)):
cur_input = input[i]
if isinstance(cur_input, StaticInputV2):
input[i] = cur_input.input
actual_input = [ actual_input = [
RecurrentLayerInput( RecurrentLayerInput(
recurrent_name=name, recurrent_name=name,
...@@ -512,7 +517,7 @@ def __layer_name_mapping_parent_names__(inname): ...@@ -512,7 +517,7 @@ def __layer_name_mapping_parent_names__(inname):
lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b',
'expand_as', 'expand_as',
'weights', 'vectors', 'weight', 'score', 'left', 'weights', 'vectors', 'weight', 'score', 'left',
'right'], 'right', 'output_mem'],
all_args) all_args)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from layer import __convert_to_v2__
simple_gru = __convert_to_v2__('simple_gru', ['input'])
simple_attention = __convert_to_v2__(
'simple_attention', ['encoded_sequence', 'encoded_proj', 'decoder_state'])
...@@ -74,21 +74,28 @@ class RNNTest(unittest.TestCase): ...@@ -74,21 +74,28 @@ class RNNTest(unittest.TestCase):
label_dim = 3 label_dim = 3
def parse_old_rnn(): def parse_old_rnn():
def step(y, wid):
z = conf_helps.embedding_layer(input=wid, size=word_dim)
mem = conf_helps.memory(name="rnn_state", size=hidden_dim)
out = conf_helps.fc_layer(
input=[y, z, mem],
size=hidden_dim,
act=conf_helps.TanhActivation(),
bias_attr=True,
name="rnn_state")
return out
def test(): def test():
data = conf_helps.data_layer(name="word", size=dict_dim) data = conf_helps.data_layer(name="word", size=dict_dim)
label = conf_helps.data_layer(name="label", size=label_dim) label = conf_helps.data_layer(name="label", size=label_dim)
emb = conf_helps.embedding_layer(input=data, size=word_dim) emb = conf_helps.embedding_layer(input=data, size=word_dim)
boot_layer = conf_helps.data_layer(name="boot", size=10)
boot_layer = conf_helps.fc_layer(
name='boot_fc', input=boot_layer, size=10)
def step(y, wid):
z = conf_helps.embedding_layer(input=wid, size=word_dim)
mem = conf_helps.memory(
name="rnn_state",
size=hidden_dim,
boot_layer=boot_layer)
out = conf_helps.fc_layer(
input=[y, z, mem],
size=hidden_dim,
act=conf_helps.TanhActivation(),
bias_attr=True,
name="rnn_state")
return out
out = conf_helps.recurrent_group( out = conf_helps.recurrent_group(
name="rnn", step=step, input=[emb, data]) name="rnn", step=step, input=[emb, data])
...@@ -111,11 +118,9 @@ class RNNTest(unittest.TestCase): ...@@ -111,11 +118,9 @@ class RNNTest(unittest.TestCase):
label = layer.data( label = layer.data(
name="label", type=data_type.dense_vector(label_dim)) name="label", type=data_type.dense_vector(label_dim))
emb = layer.embedding(input=data, size=word_dim) emb = layer.embedding(input=data, size=word_dim)
boot_layer = layer.data( boot_layer = layer.data(
name="boot", type=data_type.dense_vector(10)) name="boot", type=data_type.dense_vector(10))
boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10)
boot_layer = layer.fc(name='wtf', input=boot_layer, size=10)
def step(y, wid): def step(y, wid):
z = layer.embedding(input=wid, size=word_dim) z = layer.embedding(input=wid, size=word_dim)
...@@ -141,11 +146,9 @@ class RNNTest(unittest.TestCase): ...@@ -141,11 +146,9 @@ class RNNTest(unittest.TestCase):
return str(layer.parse_network(cost)) return str(layer.parse_network(cost))
with open("/Users/baidu/old.out", 'w') as f: diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
print >> f, parse_old_rnn() parse_new_rnn().splitlines(1))
with open("/Users/baidu/new.out", "w") as f: print ''.join(diff)
print >> f, parse_new_rnn()
# print ''.join(diff)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册