From 5906baa3f4ad9c595f5d31e35059a693c0637e0c Mon Sep 17 00:00:00 2001 From: Abhinav Arora <abhinavarora28@gmail.com> Date: Fri, 27 Oct 2017 19:28:28 -0700 Subject: [PATCH] Adding L2 Regularization to Recognize digits MLP example (#5186) --- python/paddle/v2/framework/layer_helper.py | 10 ++++---- .../tests/test_recognize_digits_mlp.py | 23 +++++++++++++++---- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/framework/layer_helper.py index 6142b1f93c3..1f72c9bc7b0 100644 --- a/python/paddle/v2/framework/layer_helper.py +++ b/python/paddle/v2/framework/layer_helper.py @@ -131,12 +131,14 @@ class LayerHelper(object): return dtype def create_parameter(self, attr, shape, dtype, suffix='w'): - if attr['name'] is None: - attr['name'] = unique_name(".".join([self.name, suffix])) + # Deepcopy the attr so that parameters can be shared in program + attr_copy = copy.deepcopy(attr) + if attr_copy['name'] is None: + attr_copy['name'] = unique_name(".".join([self.name, suffix])) self.init_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr) + dtype=dtype, shape=shape, **attr_copy) return self.program.global_block().create_parameter( - name=attr['name'], dtype=dtype, shape=shape) + name=attr_copy['name'], dtype=dtype, shape=shape) def create_tmp_variable(self, dtype): return self.program.current_block().create_var( diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py index a985d1f3d38..44a768d5e22 100644 --- a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py +++ b/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py @@ -5,9 +5,11 @@ import paddle.v2.framework.optimizer as optimizer from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.executor import Executor +from paddle.v2.framework.regularizer import L2DecayRegularizer import numpy as np +BATCH_SIZE = 128 init_program = Program() program = Program() image = layers.data( @@ -17,22 +19,35 @@ image = layers.data( program=program, init_program=init_program) +param_attr = { + 'name': None, + 'init_attr': { + 'type': 'uniform_random', + 'min': -1.0, + 'max': 1.0 + }, + 'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE) +} + hidden1 = layers.fc(input=image, size=128, act='relu', program=program, - init_program=init_program) + init_program=init_program, + param_attr=param_attr) hidden2 = layers.fc(input=hidden1, size=64, act='relu', program=program, - init_program=init_program) + init_program=init_program, + param_attr=param_attr) predict = layers.fc(input=hidden2, size=10, act='softmax', program=program, - init_program=init_program) + init_program=init_program, + param_attr=param_attr) label = layers.data( name='y', @@ -48,8 +63,6 @@ avg_cost = layers.mean(x=cost, program=program, init_program=init_program) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) opts = sgd_optimizer.minimize(avg_cost) -BATCH_SIZE = 128 - train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), -- GitLab