From 7cfe34da7c99c541189cb73165bc022bbc4289c0 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 21 Feb 2017 23:53:42 +0800 Subject: [PATCH] modify api_train_v2 --- demo/mnist/api_train_v2.py | 20 ++++--- python/paddle/v2/layer.py | 8 ++- python/paddle/v2/parameters.py | 21 ++++---- python/paddle/v2/topology.py | 96 +++++++++++++++++++++++++++++++--- python/paddle/v2/trainer.py | 20 +++---- 5 files changed, 119 insertions(+), 46 deletions(-) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 6fc01ce58b..f6edd1f34f 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -26,7 +26,9 @@ def main(): act=paddle.activation.Softmax()) cost = paddle.layer.classification_cost(input=inference, label=label) - parameters = paddle.parameters.create(cost) + topology = paddle.topology.Topology(cost) + + parameters = paddle.parameters.create(topology) for param_name in parameters.keys(): array = parameters.get(param_name) array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape) @@ -45,16 +47,12 @@ def main(): trainer = paddle.trainer.SGD(update_equation=adam_optimizer) - trainer.train(train_data_reader=train_reader, - topology=cost, - parameters=parameters, - event_handler=event_handler, - batch_size=32, # batch size should be refactor in Data reader - data_types={ # data_types will be removed, It should be in - # network topology - 'pixel': images.type, - 'label': label.type - }) + trainer.train( + train_data_reader=train_reader, + topology=topology, + parameters=parameters, + event_handler=event_handler, + batch_size=32) # batch size should be refactor in Data reader if __name__ == '__main__': diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 4d052c983c..5f146c8c03 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -66,12 +66,14 @@ Also, the creation of a protobuf message is hidden in the invocation of paddle.v2.parameters.create, no longer exposed to users. """ +import collections + import paddle.trainer_config_helpers as conf_helps -from . import data_type as v2_data from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default -import collections + +import data_type as v2_data __all__ = [ 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', @@ -184,6 +186,8 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +LayerV2 = Layer + data = DataLayerV2 fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) max_id = __convert_to_v2__( diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index ea504d5104..b569afe3a1 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -1,26 +1,23 @@ import numpy as np -from . import layer as v2_layer import py_paddle.swig_paddle as api from paddle.proto.ParameterConfig_pb2 import ParameterConfig +import topology as v2_topology + __all__ = ['Parameters', 'create'] -def create(*layers): +def create(topology): """ - Create parameter pool by layers. In paddle, layer can be represent a - model config. - - :param layers: + Create parameter pool by topology. + :param topology: :return: """ - for layer in layers: - if not isinstance(layer, v2_layer.Layer): - raise ValueError( - 'create must pass a topologies which type is paddle.layer.Layer') - model_config = v2_layer.parse_network(*layers) + if not isinstance(topology, v2_topology.Topology): + raise ValueError( + 'create must pass a topology which type is topology.Topology') pool = Parameters() - for param in model_config.parameters: + for param in topology.proto().parameters: pool.__append_config__(param) return pool diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py index ddba1b2345..6508b3ce88 100644 --- a/python/paddle/v2/topology.py +++ b/python/paddle/v2/topology.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import layer +from paddle.proto.ModelConfig_pb2 import ModelConfig +import paddle.trainer_config_helpers as conf_helps +import layer as v2_layer +import data_type __all__ = ['Topology'] @@ -23,22 +26,101 @@ class Topology(object): and network configs. """ - def __init__(self, cost): - self.cost = cost - self.__model_config__ = layer.parse_network(cost) + def __init__(self, *layers): + for layer in layers: + if not isinstance(layer, v2_layer.LayerV2): + raise ValueError('create must pass a topologies ' + 'which type is paddle.layer.Layer') + self.layers = layers + self.__model_config__ = v2_layer.parse_network(*layers) + assert isinstance(self.__model_config__, ModelConfig) - def __call__(self): + def proto(self): return self.__model_config__ def get_layer(self, name): + """ + get v2.Layer Class instance by layer name + :param name: + :return: + """ + result_layer = [] + + def find_layer_by_name(layer, layer_name): + if layer.name == layer_name and len(result_layer) == 0: + result_layer.append(layer) + for parent_layer in layer.__parent_layers__.values(): + find_layer_by_name(parent_layer, layer_name) + + for layer in self.layers: + find_layer_by_name(layer, name) + + return result_layer[0] + + def get_data_layer(self): + """ + get all data layer + :return: + """ + data_layers = [] + + def find_data_layer(layer): + assert isinstance(layer, layer.LayerV2) + if isinstance(layer, v2_layer.DataLayerV2): + if len( + filter(lambda data_layer: data_layer.name == layer.name, + data_layers)) == 0: + data_layers.append(layer) + for parent_layer in layer.__parent_layers__.values(): + find_data_layer(parent_layer) + + for layer in self.layers: + find_data_layer(layer) + + return data_layers + + def get_layer_proto(self, name): """ get layer by layer name :param name: :return: """ - pass + layers = filter(lambda layer: layer.name == name, + self.__model_config__.layers) + if len(layers) is 1: + return layers[0] + else: + return None def data_type(self): """ + get data_type from proto, such as: + [('image', dense_vector(768)), ('label', integer_value(10))] + the order is the same with __model_config__.input_layer_names """ - pass + data_types_lists = [] + for layer_name in self.__model_config__.input_layer_names: + data_types_lists.append( + (layer_name, self.get_layer(layer_name).type)) + + return data_types_lists + + +if __name__ == '__main__': + pixel = v2_layer.data(name='pixel', type=data_type.dense_vector(784)) + label = v2_layer.data(name='label', type=data_type.integer_value(10)) + hidden = v2_layer.fc(input=pixel, + size=100, + act=conf_helps.SigmoidActivation()) + inference = v2_layer.fc(input=hidden, + size=10, + act=conf_helps.SoftmaxActivation()) + maxid = v2_layer.max_id(input=inference) + cost1 = v2_layer.classification_cost(input=inference, label=label) + cost2 = v2_layer.cross_entropy_cost(input=inference, label=label) + + print Topology(cost1).proto() + print Topology(cost2).proto() + print Topology(cost1, cost2).proto() + print Topology(cost2).proto() + print Topology(inference, maxid).proto() diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 4365bd41e7..c8da6e70cf 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -1,13 +1,12 @@ import collections import py_paddle.swig_paddle as api -from paddle.proto.ModelConfig_pb2 import ModelConfig from py_paddle import DataProviderConverter from . import event as v2_event -from . import layer as v2_layer from . import optimizer as v2_optimizer from . import parameters as v2_parameters +from . import topology as v2_topology __all__ = ['ITrainer', 'SGD'] @@ -88,12 +87,11 @@ class SGD(ITrainer): if event_handler is None: event_handler = default_event_handler - topology = v2_layer.parse_network(topology) - __check_train_args__(**locals()) gm = api.GradientMachine.createFromConfigProto( - topology, api.CREATE_MODE_NORMAL, self.__optimizer__.enable_types()) + topology.proto(), api.CREATE_MODE_NORMAL, + self.__optimizer__.enable_types()) assert isinstance(gm, api.GradientMachine) parameters.append_gradient_machine(gm) @@ -102,13 +100,7 @@ class SGD(ITrainer): gm.start() out_args = api.Arguments.createArguments(0) - - data_types_lists = [] - for each in topology.input_layer_names: - if each not in data_types: - raise ValueError() - data_types_lists.append(data_types[each]) - + data_types_lists = [data_type[1] for data_type in topology.data_type()] converter = DataProviderConverter(input_types=data_types_lists) for pass_id in xrange(num_passes): @@ -141,7 +133,7 @@ def __data_reader_to_batch__(reader, batch_size, topology): def input_reorder(func): for item in func(): retv = [] - for __layer_name__ in topology.input_layer_names: + for __layer_name__ in topology.proto().input_layer_names: retv.append(item[__layer_name__]) yield retv @@ -178,7 +170,7 @@ def __check_train_args__(train_data_reader, topology, parameters, raise ValueError('test_data_reader should be a function, which can ' 'return a iterator') - if not isinstance(topology, ModelConfig): + if not isinstance(topology, v2_topology.Topology): raise ValueError('topology should be a model config') if not isinstance(parameters, v2_parameters.Parameters): -- GitLab