提交 6089b7c6 编写于 作者: J jacquesqiao 提交者: GitHub

Merge pull request #1278 from jacquesqiao/v2-network

add v2-layer
...@@ -6,25 +6,16 @@ passed to C++ side of Paddle. ...@@ -6,25 +6,16 @@ passed to C++ side of Paddle.
The user api could be simpler and carefully designed. The user api could be simpler and carefully designed.
""" """
import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter
import paddle.trainer.PyDataProvider2 as dp
import numpy as np
import random import random
from mnist_util import read_from_mnist
from paddle.trainer_config_helpers import *
import paddle.v2
import numpy as np
import paddle.trainer.PyDataProvider2 as dp
import paddle.v2 as paddle_v2
import py_paddle.swig_paddle as api
from paddle.trainer_config_helpers import *
from py_paddle import DataProviderConverter
def network_config(): from mnist_util import read_from_mnist
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
name='label', size=10))
outputs(cost)
def init_parameter(network): def init_parameter(network):
...@@ -67,7 +58,7 @@ def input_order_converter(generator): ...@@ -67,7 +58,7 @@ def input_order_converter(generator):
def main(): def main():
api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores
optimizer = paddle.v2.optimizer.Adam( optimizer = paddle_v2.optimizer.Adam(
learning_rate=1e-4, learning_rate=1e-4,
batch_size=1000, batch_size=1000,
model_average=ModelAverage(average_window=0.5), model_average=ModelAverage(average_window=0.5),
...@@ -79,8 +70,18 @@ def main(): ...@@ -79,8 +70,18 @@ def main():
updater = optimizer.create_local_updater() updater = optimizer.create_local_updater()
assert isinstance(updater, api.ParameterUpdater) assert isinstance(updater, api.ParameterUpdater)
# define network
images = paddle_v2.layer.data(name='pixel', size=784)
label = paddle_v2.layer.data(name='label', size=10)
hidden1 = paddle_v2.layer.fc(input=images, size=200)
hidden2 = paddle_v2.layer.fc(input=hidden1, size=200)
inference = paddle_v2.layer.fc(input=hidden2,
size=10,
act=paddle_v2.activation.Softmax())
cost = paddle_v2.layer.classification_cost(input=inference, label=label)
# Create Simple Gradient Machine. # Create Simple Gradient Machine.
model_config = parse_network_config(network_config) model_config = paddle_v2.layer.parse_network(cost)
m = api.GradientMachine.createFromConfigProto(model_config, m = api.GradientMachine.createFromConfigProto(model_config,
api.CREATE_MODE_NORMAL, api.CREATE_MODE_NORMAL,
optimizer.enable_types()) optimizer.enable_types())
......
from paddle.trainer_config_helpers import *
from paddle.trainer.PyDataProvider2 import dense_vector, integer_value
import paddle.v2 as paddle
import numpy import numpy
import paddle.v2 as paddle
from paddle.trainer.PyDataProvider2 import dense_vector, integer_value
import mnist_util import mnist_util
...@@ -12,32 +12,31 @@ def train_reader(): ...@@ -12,32 +12,31 @@ def train_reader():
yield item yield item
def network_config():
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
name='label', size=10))
outputs(cost)
def main(): def main():
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
topology = parse_network_config(network_config)
# define network topology
images = paddle.layer.data(name='pixel', size=784)
label = paddle.layer.data(name='label', size=10)
hidden1 = paddle.layer.fc(input=images, size=200)
hidden2 = paddle.layer.fc(input=hidden1, size=200)
inference = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
cost = paddle.layer.classification_cost(input=inference, label=label)
topology = paddle.layer.parse_network(cost)
parameters = paddle.parameters.create(topology) parameters = paddle.parameters.create(topology)
for param_name in parameters.keys(): for param_name in parameters.keys():
array = parameters.get(param_name) array = parameters.get(param_name)
array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape) array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape)
parameters.set(parameter_name=param_name, value=array) parameters.set(parameter_name=param_name, value=array)
adam_optimizer = paddle.optimizer.Optimizer( adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01)
learning_rate=0.01, learning_method=AdamOptimizer())
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
para = parameters.get('___fc_layer_2__.w0') para = parameters.get('___fc_2__.w0')
print "Pass %d, Batch %d, Cost %f, Weight Mean Of Fc 2 is %f" % ( print "Pass %d, Batch %d, Cost %f, Weight Mean Of Fc 2 is %f" % (
event.pass_id, event.batch_id, event.cost, para.mean()) event.pass_id, event.batch_id, event.cost, para.mean())
......
...@@ -12,12 +12,16 @@ ...@@ -12,12 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import optimizer import optimizer
import layer
import activation
import parameters import parameters
import py_paddle.swig_paddle as api
import trainer import trainer
import event import event
import py_paddle.swig_paddle as api
__all__ = ['optimizer', 'parameters', 'init', 'trainer', 'event'] __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'event'
]
def init(**kwargs): def init(**kwargs):
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers.activations import *
__all__ = [
"Base", "Tanh", "Sigmoid", "Softmax", "Identity", "Linear",
'SequenceSoftmax', "Exp", "Relu", "BRelu", "SoftRelu", "STanh", "Abs",
"Square", "Log"
]
Base = BaseActivation
Tanh = TanhActivation
Sigmoid = SigmoidActivation
Softmax = SoftmaxActivation
SequenceSoftmax = SequenceSoftmaxActivation
Identity = IdentityActivation
Linear = Identity
Relu = ReluActivation
BRelu = BReluActivation
SoftRelu = SoftReluActivation
STanh = STanhActivation
Abs = AbsActivation
Square = SquareActivation
Exp = ExpActivation
Log = LogActivation
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Before this new package paddle.v2.layer, users would need to use functions
in paddle.trainer_config_helpers.layers to configure networks.
The Old Way:
=========
This old way requires that the creation of a network be defined in a Python
function, say network_config, and that this Python function being passed to
paddle.trainer_config_helpers.parse_network_config for the creation of
protobuf message description of this network.
```python
def network_config():
img = paddle.trainer_config_helpers.data_layer(name="pixel", size=784)
inference = paddle.trainer_config_helpers.fc_layer(
input=img,
size=10,
act=paddle.trainer_config_helpers.SoftmaxActivation())
cost = paddle.trainer_config_helpers.classification_cost(
input=inference,
label=paddle.trainer_config_helpers.data_layer(name="label", size=10))
proto_desc = parse_network_config(network_config)
```
When parse_network_config executes network_config, those layer definition
functions like data_layer and fc_layer would change some Python global variables,
so that after the execution, parse_network_config could collect information from
these global variables and generates the protobuf message.
The New Way:
=========
In this PR, we define a function in paddle.v2.layer which creates a Python
class for each layer creation function in paddle.trainer_config_helpers.layers.
Users can use create a network as follows:
```python
img = paddle.v2.layer.data(name="pixel", size=784)
inference = paddle.v2.layer.fc(input=img, size=10, act=paddle.v2.layer.Softmax())
cost = paddle.v2.layer.classification(
input=inference,
label=paddle.v2.layer.data(name="label", size=10))
parameters = paddle.v2.parameters.create(cost)
```
This new way doesn't require those invocations to layer definition functions
to be in a Python function but could be anywhere.
Also, the creation of a protobuf message is hidden in the invocation of
paddle.v2.parameters.create, no longer exposed to users.
"""
import paddle.trainer_config_helpers as conf_helps
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as __parse__
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
import collections
__all__ = [
'parse_network', 'data', 'fc', 'max_id', 'classification_cost',
'cross_entropy_cost'
]
def parse_network(*outputs):
"""
parse all output layers and then generate a model config proto.
:param outputs:
:return:
"""
def __real_func__():
context = dict()
real_output = [each.to_proto(context=context) for each in outputs]
conf_helps.outputs(real_output)
return __parse__(__real_func__)
class Layer(object):
def __init__(self, name, parent_layers):
assert isinstance(parent_layers, dict)
assert isinstance(name, basestring)
self.name = name
self.__parent_layers__ = parent_layers
def to_proto(self, context):
"""
function to set proto attribute
"""
kwargs = dict()
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
collections.Sequence):
v1_layer = self.__parent_layers__[layer_name].to_proto(
context=context)
else:
v1_layer = map(lambda x: x.to_proto(context=context),
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
if self.name not in context:
context[self.name] = self.to_proto_impl(**kwargs)
return context[self.name]
def to_proto_impl(self, **kwargs):
raise NotImplementedError()
def __convert_to_v2__(method_name, name_prefix, parent_names):
if name_prefix is not None:
wrapper = wrap_name_default(name_prefix=name_prefix)
else:
wrapper = None
class V2LayerImpl(Layer):
def __init__(self, name=None, **kwargs):
parent_layers = dict()
other_kwargs = dict()
for pname in parent_names:
parent_layers[pname] = kwargs[pname]
for key in kwargs.keys():
if key not in parent_names:
other_kwargs[key] = kwargs[key]
super(V2LayerImpl, self).__init__(name, parent_layers)
self.__other_kwargs__ = other_kwargs
if wrapper is not None:
__init__ = wrapper(__init__)
def to_proto_impl(self, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each]
return getattr(conf_helps, method_name)(name=self.name, **args)
return V2LayerImpl
data = __convert_to_v2__('data_layer', None, [])
fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input'])
max_id = __convert_to_v2__(
'maxid_layer', name_prefix='maxid_layer', parent_names=['input'])
classification_cost = __convert_to_v2__(
'classification_cost',
name_prefix='classification_cost',
parent_names=['input', 'label'])
cross_entropy_cost = __convert_to_v2__(
'cross_entropy',
name_prefix='cross_entropy',
parent_names=['input', 'label'])
if __name__ == '__main__':
pixel = data(name='pixel', size=784)
label = data(name='label', size=10)
hidden = fc(input=pixel, size=100, act=conf_helps.SigmoidActivation())
inference = fc(input=hidden, size=10, act=conf_helps.SoftmaxActivation())
maxid = max_id(input=inference)
cost1 = classification_cost(input=inference, label=label)
cost2 = cross_entropy_cost(input=inference, label=label)
print parse_network(cost1)
print parse_network(cost2)
print parse_network(cost1, cost2)
print parse_network(cost2)
print parse_network(inference, maxid)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册