diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index 8573d8143a085b8d2e0bcf7df17b1abe177029df..fe39f0bd23f78e1a9d61f708dc880d9853b7a5f9 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -6,25 +6,16 @@ passed to C++ side of Paddle. The user api could be simpler and carefully designed. """ -import py_paddle.swig_paddle as api -from py_paddle import DataProviderConverter -import paddle.trainer.PyDataProvider2 as dp -import numpy as np import random -from mnist_util import read_from_mnist -from paddle.trainer_config_helpers import * -import paddle.v2 +import numpy as np +import paddle.trainer.PyDataProvider2 as dp +import paddle.v2 as paddle_v2 +import py_paddle.swig_paddle as api +from paddle.trainer_config_helpers import * +from py_paddle import DataProviderConverter -def network_config(): - imgs = data_layer(name='pixel', size=784) - hidden1 = fc_layer(input=imgs, size=200) - hidden2 = fc_layer(input=hidden1, size=200) - inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - cost = classification_cost( - input=inference, label=data_layer( - name='label', size=10)) - outputs(cost) +from mnist_util import read_from_mnist def init_parameter(network): @@ -67,7 +58,7 @@ def input_order_converter(generator): def main(): api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores - optimizer = paddle.v2.optimizer.Adam( + optimizer = paddle_v2.optimizer.Adam( learning_rate=1e-4, batch_size=1000, model_average=ModelAverage(average_window=0.5), @@ -79,8 +70,18 @@ def main(): updater = optimizer.create_local_updater() assert isinstance(updater, api.ParameterUpdater) + # define network + images = paddle_v2.layer.data(name='pixel', size=784) + label = paddle_v2.layer.data(name='label', size=10) + hidden1 = paddle_v2.layer.fc(input=images, size=200) + hidden2 = paddle_v2.layer.fc(input=hidden1, size=200) + inference = paddle_v2.layer.fc(input=hidden2, + size=10, + act=paddle_v2.activation.Softmax()) + cost = paddle_v2.layer.classification_cost(input=inference, label=label) + # Create Simple Gradient Machine. - model_config = parse_network_config(network_config) + model_config = paddle_v2.layer.parse_network(cost) m = api.GradientMachine.createFromConfigProto(model_config, api.CREATE_MODE_NORMAL, optimizer.enable_types()) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 5e46d510ad35bb25a36e74a892fc211b1844ad03..59486ed1b3ba494a20b06b7ef7027fc3e86c043c 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -1,7 +1,7 @@ -from paddle.trainer_config_helpers import * -from paddle.trainer.PyDataProvider2 import dense_vector, integer_value -import paddle.v2 as paddle import numpy +import paddle.v2 as paddle +from paddle.trainer.PyDataProvider2 import dense_vector, integer_value + import mnist_util @@ -12,32 +12,31 @@ def train_reader(): yield item -def network_config(): - imgs = data_layer(name='pixel', size=784) - hidden1 = fc_layer(input=imgs, size=200) - hidden2 = fc_layer(input=hidden1, size=200) - inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - cost = classification_cost( - input=inference, label=data_layer( - name='label', size=10)) - outputs(cost) - - def main(): paddle.init(use_gpu=False, trainer_count=1) - topology = parse_network_config(network_config) + + # define network topology + images = paddle.layer.data(name='pixel', size=784) + label = paddle.layer.data(name='label', size=10) + hidden1 = paddle.layer.fc(input=images, size=200) + hidden2 = paddle.layer.fc(input=hidden1, size=200) + inference = paddle.layer.fc(input=hidden2, + size=10, + act=paddle.activation.Softmax()) + cost = paddle.layer.classification_cost(input=inference, label=label) + + topology = paddle.layer.parse_network(cost) parameters = paddle.parameters.create(topology) for param_name in parameters.keys(): array = parameters.get(param_name) array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape) parameters.set(parameter_name=param_name, value=array) - adam_optimizer = paddle.optimizer.Optimizer( - learning_rate=0.01, learning_method=AdamOptimizer()) + adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01) def event_handler(event): if isinstance(event, paddle.event.EndIteration): - para = parameters.get('___fc_layer_2__.w0') + para = parameters.get('___fc_2__.w0') print "Pass %d, Batch %d, Cost %f, Weight Mean Of Fc 2 is %f" % ( event.pass_id, event.batch_id, event.cost, para.mean()) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 72f1168e94f2a7de627551486b7dd6a5bc92940c..bc064a21ae150256752156f7ace56438321d5ba7 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -12,12 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. import optimizer +import layer +import activation import parameters -import py_paddle.swig_paddle as api import trainer import event +import py_paddle.swig_paddle as api -__all__ = ['optimizer', 'parameters', 'init', 'trainer', 'event'] +__all__ = [ + 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'event' +] def init(**kwargs): diff --git a/python/paddle/v2/activation.py b/python/paddle/v2/activation.py new file mode 100644 index 0000000000000000000000000000000000000000..1f3aab9ef3c5f69e22d7e83250d0ff46c1ff718a --- /dev/null +++ b/python/paddle/v2/activation.py @@ -0,0 +1,37 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers.activations import * + +__all__ = [ + "Base", "Tanh", "Sigmoid", "Softmax", "Identity", "Linear", + 'SequenceSoftmax', "Exp", "Relu", "BRelu", "SoftRelu", "STanh", "Abs", + "Square", "Log" +] + +Base = BaseActivation +Tanh = TanhActivation +Sigmoid = SigmoidActivation +Softmax = SoftmaxActivation +SequenceSoftmax = SequenceSoftmaxActivation +Identity = IdentityActivation +Linear = Identity +Relu = ReluActivation +BRelu = BReluActivation +SoftRelu = SoftReluActivation +STanh = STanhActivation +Abs = AbsActivation +Square = SquareActivation +Exp = ExpActivation +Log = LogActivation diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py new file mode 100644 index 0000000000000000000000000000000000000000..0ce4ecd569aa1dd9ad27c65775d235b969a52905 --- /dev/null +++ b/python/paddle/v2/layer.py @@ -0,0 +1,186 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Before this new package paddle.v2.layer, users would need to use functions +in paddle.trainer_config_helpers.layers to configure networks. + +The Old Way: +========= +This old way requires that the creation of a network be defined in a Python +function, say network_config, and that this Python function being passed to +paddle.trainer_config_helpers.parse_network_config for the creation of +protobuf message description of this network. + +```python +def network_config(): + img = paddle.trainer_config_helpers.data_layer(name="pixel", size=784) + inference = paddle.trainer_config_helpers.fc_layer( + input=img, + size=10, + act=paddle.trainer_config_helpers.SoftmaxActivation()) + cost = paddle.trainer_config_helpers.classification_cost( + input=inference, + label=paddle.trainer_config_helpers.data_layer(name="label", size=10)) + +proto_desc = parse_network_config(network_config) +``` + +When parse_network_config executes network_config, those layer definition +functions like data_layer and fc_layer would change some Python global variables, +so that after the execution, parse_network_config could collect information from +these global variables and generates the protobuf message. + + + +The New Way: +========= +In this PR, we define a function in paddle.v2.layer which creates a Python +class for each layer creation function in paddle.trainer_config_helpers.layers. +Users can use create a network as follows: + +```python +img = paddle.v2.layer.data(name="pixel", size=784) +inference = paddle.v2.layer.fc(input=img, size=10, act=paddle.v2.layer.Softmax()) +cost = paddle.v2.layer.classification( + input=inference, + label=paddle.v2.layer.data(name="label", size=10)) + +parameters = paddle.v2.parameters.create(cost) +``` + +This new way doesn't require those invocations to layer definition functions +to be in a Python function but could be anywhere. + +Also, the creation of a protobuf message is hidden in the invocation of +paddle.v2.parameters.create, no longer exposed to users. +""" + +import paddle.trainer_config_helpers as conf_helps +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as __parse__ +from paddle.trainer_config_helpers.default_decorators import wrap_name_default +import collections + +__all__ = [ + 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', + 'cross_entropy_cost' +] + + +def parse_network(*outputs): + """ + parse all output layers and then generate a model config proto. + :param outputs: + :return: + """ + + def __real_func__(): + context = dict() + real_output = [each.to_proto(context=context) for each in outputs] + conf_helps.outputs(real_output) + + return __parse__(__real_func__) + + +class Layer(object): + def __init__(self, name, parent_layers): + assert isinstance(parent_layers, dict) + assert isinstance(name, basestring) + self.name = name + self.__parent_layers__ = parent_layers + + def to_proto(self, context): + """ + function to set proto attribute + """ + kwargs = dict() + for layer_name in self.__parent_layers__: + if not isinstance(self.__parent_layers__[layer_name], + collections.Sequence): + v1_layer = self.__parent_layers__[layer_name].to_proto( + context=context) + else: + v1_layer = map(lambda x: x.to_proto(context=context), + self.__parent_layers__[layer_name]) + kwargs[layer_name] = v1_layer + + if self.name not in context: + context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] + + def to_proto_impl(self, **kwargs): + raise NotImplementedError() + + +def __convert_to_v2__(method_name, name_prefix, parent_names): + if name_prefix is not None: + wrapper = wrap_name_default(name_prefix=name_prefix) + else: + wrapper = None + + class V2LayerImpl(Layer): + def __init__(self, name=None, **kwargs): + parent_layers = dict() + other_kwargs = dict() + for pname in parent_names: + parent_layers[pname] = kwargs[pname] + + for key in kwargs.keys(): + if key not in parent_names: + other_kwargs[key] = kwargs[key] + + super(V2LayerImpl, self).__init__(name, parent_layers) + self.__other_kwargs__ = other_kwargs + + if wrapper is not None: + __init__ = wrapper(__init__) + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__other_kwargs__: + args[each] = self.__other_kwargs__[each] + return getattr(conf_helps, method_name)(name=self.name, **args) + + return V2LayerImpl + + +data = __convert_to_v2__('data_layer', None, []) +fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) +max_id = __convert_to_v2__( + 'maxid_layer', name_prefix='maxid_layer', parent_names=['input']) +classification_cost = __convert_to_v2__( + 'classification_cost', + name_prefix='classification_cost', + parent_names=['input', 'label']) +cross_entropy_cost = __convert_to_v2__( + 'cross_entropy', + name_prefix='cross_entropy', + parent_names=['input', 'label']) + +if __name__ == '__main__': + pixel = data(name='pixel', size=784) + label = data(name='label', size=10) + hidden = fc(input=pixel, size=100, act=conf_helps.SigmoidActivation()) + inference = fc(input=hidden, size=10, act=conf_helps.SoftmaxActivation()) + maxid = max_id(input=inference) + cost1 = classification_cost(input=inference, label=label) + cost2 = cross_entropy_cost(input=inference, label=label) + + print parse_network(cost1) + print parse_network(cost2) + print parse_network(cost1, cost2) + print parse_network(cost2) + print parse_network(inference, maxid)