From ffc341675dd8c8057a105fb8ebfe5a91b43ce6ca Mon Sep 17 00:00:00 2001 From: luotao1 Date: Tue, 27 Sep 2016 13:20:49 +0800 Subject: [PATCH] Add parallel_nn api and unittest (#110) * Add `device` parameter to ExtraAttr in trainer_config_helpers. * add unittest for it. --- .../tests/sample_trainer_config_parallel.conf | 151 +++++------------- python/paddle/trainer_config_helpers/attrs.py | 13 +- .../paddle/trainer_config_helpers/layers.py | 14 +- 3 files changed, 65 insertions(+), 113 deletions(-) diff --git a/paddle/trainer/tests/sample_trainer_config_parallel.conf b/paddle/trainer/tests/sample_trainer_config_parallel.conf index 3563fede1c..e35a1f26da 100644 --- a/paddle/trainer/tests/sample_trainer_config_parallel.conf +++ b/paddle/trainer/tests/sample_trainer_config_parallel.conf @@ -13,137 +13,74 @@ # See the License for the specific language governing permissions and # limitations under the License. -#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. +from paddle.trainer_config_helpers import * -TrainData( - SimpleData( - files = "trainer/tests/sample_filelist.txt", - feat_dim = 3, - context_len = 0, - buffer_capacity = 1000000, - ) -) +TrainData(SimpleData( + files = "trainer/tests/sample_filelist.txt", + feat_dim = 3, + context_len = 0, + buffer_capacity = 1000000)) -TestData( - SimpleData( - files = "trainer/tests/sample_filelist.txt", - feat_dim = 3, - context_len = 0, - buffer_capacity = 1000000, - ) -) +TestData(SimpleData( + files = "trainer/tests/sample_filelist.txt", + feat_dim = 3, + context_len = 0, + buffer_capacity = 1000000)) -Settings( - algorithm = "sgd", - num_batches_per_send_parameter = 1, - num_batches_per_get_parameter = 1, - batch_size = 100, - learning_rate = 0.001, - learning_rate_decay_a = 1e-5, - learning_rate_decay_b = 0.5, -) +settings(batch_size = 100) -default_initial_std(0.2) # Output layer, label layer, cost layer, preferably set to the same environment. output_device = 0 -model_type("nn") - # Input Layer does not need to specify the device number. -Layer( - name = "input", - type = "data", - size = 3, -) +data = data_layer(name='input', size=3) # Calculate in the CPU. -Layer( - name = "layer1_1", - type = "fc", - size = 5, - active_type = "sigmoid", - device = -1, - inputs = "input", -) +fc1 = fc_layer(input=data, size=5, + bias_attr=True, + layer_attr=ExtraAttr(device=-1), + act=SigmoidActivation()) # Calculate in the GPU 0. -Layer( - name = "layer2_1", - type = "fc", - size = 10, - active_type = "sigmoid", - device = 0, - inputs = "layer1_1", -) +fc2 = fc_layer(input=fc1, size=10, + bias_attr=True, + layer_attr=ExtraAttr(device=0), + act=SigmoidActivation()) # Calculate in the GPU 1. -Layer( - name = "layer2_2", - type = "fc", - size = 10, - active_type = "sigmoid", - device = 1, - inputs = "layer1_1", -) +fc3 = fc_layer(input=fc1, size=10, + bias_attr=True, + layer_attr=ExtraAttr(device=1), + act=SigmoidActivation()) # Calculate in the GPU 0. -Layer( - name = "layer3_1", - type = "fc", - size = 10, - device = 0, - active_type = "sigmoid", - inputs = ["layer2_1", "layer2_2"], -) +fc4 = fc_layer(input=[fc2,fc3], size=10, + bias_attr=True, + layer_attr=ExtraAttr(device=0), + act=SigmoidActivation()) # Calculate in the GPU 1. -Layer( - name = "layer3_2", - type = "fc", - size = 10, - device = 1, - active_type = "sigmoid", - inputs = ["layer2_1", "layer2_2"], -) - +fc5 = fc_layer(input=[fc2,fc3], size=10, + bias_attr=True, + layer_attr=ExtraAttr(device=1), + act=SigmoidActivation()) -Layer( - name = "output", - type = "fc", - size = 10, - device = output_device, - active_type = "sigmoid", - inputs = ["layer3_1", "layer3_2"], -) +output = fc_layer(input=[fc4,fc5], size=10, + bias_attr=True, + layer_attr=ExtraAttr(device=output_device), + act=SoftmaxActivation()) if get_config_arg('with_cost', bool, True): # This is for training the neural network. # We need to have another data layer for label # and a layer for calculating cost - Layer( - name = "label", - type = "data", - device = output_device, - size = 1, - ) - - Layer( - name = "cost", - type = "multi-class-cross-entropy", - device = output_device, - inputs = ["output", "label"], - ) - - Evaluator( - name = "error", - type = "classification_error", - inputs = ["output", "label"]) - - Inputs("input", "label") - Outputs("cost") - + lbl = data_layer(name='label', size=1, + layer_attr=ExtraAttr(device=output_device)) + + outputs(classification_cost(input=output, + label=lbl, + layer_attr=ExtraAttr(device=output_device))) else: # This is for prediction where we don't have label # and don't need to calculate cost - Inputs("input") - Outputs("output") + outputs(output) diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index 2b5b451edd..d263441247 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -174,12 +174,16 @@ class ExtraLayerAttribute(object): The dropout rate is the zero rate of this mask. The details of what dropout is please refer to `here `_ + JMLRdropout.pdf>`_. :type drop_rate: float - + :param device: device ID of layer. device=-1, use CPU. device>0, use GPU. + The details allocation in parallel_nn please refer to `here + `_. + :type device: int """ - def __init__(self, error_clipping_threshold=None, drop_rate=None): + def __init__(self, error_clipping_threshold=None, drop_rate=None, device=None): self.attr = dict() if isinstance(error_clipping_threshold, float): assert error_clipping_threshold > 0 @@ -189,6 +193,9 @@ class ExtraLayerAttribute(object): assert drop_rate > 0 self.attr["drop_rate"] = drop_rate + if isinstance(device, int): + self.attr["device"] = device + def check(self, layer_name): for key in self.attr: if not hasattr(self, 'can_%s' % key) or \ diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 8b7cabf2fa..76b0db546b 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -201,6 +201,7 @@ class LayerOutput(object): ERROR_CLIPPING = 'error_clipping_threshold' DROPOUT = 'drop_rate' +DEVICE = 'device' def check_input(input): @@ -223,10 +224,12 @@ def check_input(input): def layer_support(*attrs): + attrs_list = list(attrs) + attrs_list.append(DEVICE) def decorator(method): @functools.wraps(method) def wrapper(*args, **kwargs): - for attr in attrs: + for attr in attrs_list: for each in args: if isinstance(each, ExtraLayerAttribute): setattr(each, '_'.join(['can', attr]), True) @@ -2625,9 +2628,11 @@ def regression_cost(input, label, cost='square_error', name=None): @wrap_name_default("cost") +@layer_support() def classification_cost(input, label, name=None, cost="multi-class-cross-entropy", - evaluator=classification_error_evaluator): + evaluator=classification_error_evaluator, + layer_attr=None): """ classification cost Layer. @@ -2640,13 +2645,16 @@ def classification_cost(input, label, name=None, :param cost: cost method. :type cost: basestring :param evaluator: Evaluator method. + :param layer_attr: layer's extra attribute. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ assert input.layer_type != LayerType.DATA assert isinstance(input.activation, SoftmaxActivation) assert label.layer_type == LayerType.DATA - Layer(name=name, type=cost, inputs=[Input(input.name), Input(label.name)]) + Layer(name=name, type=cost, inputs=[Input(input.name), Input(label.name)], + **ExtraLayerAttribute.to_kwargs(layer_attr)) def __add_evaluator__(e): assert callable(e) -- GitLab