提交 1fc43527 编写于 作者: L luotao1 提交者: Yu Yang

refine sparse momentum api and unittest (#126)

* refine sparse momentum api and unittest
* fix unittests bug
上级 6decbdf4
...@@ -142,6 +142,7 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge ...@@ -142,6 +142,7 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge
The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`: The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`:
.. code-block:: python .. code-block:: python
group_inputs=[StaticInput(input=encoded_vector,is_seq=True), group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)] StaticInput(input=encoded_proj,is_seq=True)]
trg_embedding = embedding_layer( trg_embedding = embedding_layer(
......
...@@ -4,6 +4,12 @@ BaseSGDOptimizer ...@@ -4,6 +4,12 @@ BaseSGDOptimizer
:members: BaseSGDOptimizer :members: BaseSGDOptimizer
:noindex: :noindex:
MomentumOptimizer
=================
.. automodule:: paddle.trainer_config_helpers.optimizers
:members: MomentumOptimizer
:noindex:
AdamOptimizer AdamOptimizer
============= =============
.. automodule:: paddle.trainer_config_helpers.optimizers .. automodule:: paddle.trainer_config_helpers.optimizers
......
...@@ -47,7 +47,7 @@ add_test(NAME test_CompareTwoOpts ...@@ -47,7 +47,7 @@ add_test(NAME test_CompareTwoOpts
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts
--config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf --config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf
--num_passes=1 --need_high_accuracy=1 --num_passes=1 --need_high_accuracy=0
WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) WORKING_DIRECTORY ${PROJ_ROOT}/paddle/)
################# test_CompareSparse ################## ################# test_CompareSparse ##################
......
trainer/tests/mnist_bin_part
...@@ -12,32 +12,29 @@ ...@@ -12,32 +12,29 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. from paddle.trainer_config_helpers import *
################################### Data Configuration ################################### ################################### Data Configuration ###################################
TrainData(ProtoData(files = "train.list")) TrainData(ProtoData(files = "trainer/tests/mnist.list"))
################################### Algorithm Configuration ################################### ################################### Algorithm Configuration ###################################
Settings( settings(batch_size = 1000,
learning_rate_decay_a = 0.0, learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
learning_rate_decay_b = 0.0,
learning_rate = 1e-03,
batch_size = 1000,
algorithm = 'sgd',
num_batches_per_send_parameter = 1,
num_batches_per_get_parameter = 1,
learning_method='sparse_momentum',
)
default_momentum(0.5)
################################### Network Configuration ################################### ################################### Network Configuration ###################################
Layer(type = "data", name = "input", size = 784) data = data_layer(name ="input", size=784)
Layer(inputs = [Input("input", parameter_name = "_layer1.w")], name = "layer1", bias = Bias(parameter_name = "_layer1.bias"), active_type = "sigmoid", type = "fc", size = 800)
Layer(inputs = [Input("layer1", parameter_name = "_layer2.w")], name = "layer2", bias = Bias(parameter_name = "_layer2.bias"), active_type = "sigmoid", type = "fc", size = 800) fc1 = fc_layer(input=data, size=800,
#Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w", decay_rate = 0.02)], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), active_type = "margin", type = "fc", size = 10) bias_attr=True,
#Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w", decay_rate = 0.02)], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), type = "fc", size = 10) act=SigmoidActivation())
Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w")], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), active_type = "softmax", type = "fc", size = 10)
Layer(type = "data", name = "label", size = 1) fc2 = fc_layer(input=fc1, size=800,
Layer(inputs = [Input("output"), Input("label")], type = "multi-class-cross-entropy", name = "cost") bias_attr=True,
#Layer(inputs = [Input("output"), Input("label")], type = "huber", name = "cost") act=SigmoidActivation())
Evaluator(inputs=["output", "label"], type = "classification_error", name = "classification_error")
Inputs("input", "label") output = fc_layer(input=[fc1, fc2], size=10,
Outputs("cost") bias_attr=True,
act=SoftmaxActivation())
lbl = data_layer(name ="label", size=1)
cost = classification_cost(input=output, label=lbl)
outputs(cost)
...@@ -12,32 +12,29 @@ ...@@ -12,32 +12,29 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. from paddle.trainer_config_helpers import *
################################### Data Configuration ################################### ################################### Data Configuration ###################################
TrainData(ProtoData(files = "train.list")) TrainData(ProtoData(files = "trainer/tests/mnist.list"))
################################### Algorithm Configuration ################################### ################################### Algorithm Configuration ###################################
Settings( settings(batch_size = 1000,
learning_rate_decay_a = 0.0, learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
learning_rate_decay_b = 0.0,
learning_rate = 1e-03,
batch_size = 1000,
algorithm = 'sgd',
num_batches_per_send_parameter = 1,
num_batches_per_get_parameter = 1,
learning_method='momentum',
)
default_momentum(0.5)
################################### Network Configuration ################################### ################################### Network Configuration ###################################
Layer(type = "data", name = "input", size = 784) data = data_layer(name ="input", size=784)
Layer(inputs = [Input("input", parameter_name = "_layer1.w")], name = "layer1", bias = Bias(parameter_name = "_layer1.bias"), active_type = "sigmoid", type = "fc", size = 800)
Layer(inputs = [Input("layer1", parameter_name = "_layer2.w")], name = "layer2", bias = Bias(parameter_name = "_layer2.bias"), active_type = "sigmoid", type = "fc", size = 800) fc1 = fc_layer(input=data, size=800,
#Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w", decay_rate = 0.02)], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), active_type = "margin", type = "fc", size = 10) bias_attr=True,
#Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w", decay_rate = 0.02)], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), type = "fc", size = 10) act=SigmoidActivation())
Layer(inputs = [Input("layer2", parameter_name = "_layer_output.w")], name = "output", bias = Bias(parameter_name = "_layer_output.bias"), active_type = "softmax", type = "fc", size = 10)
Layer(type = "data", name = "label", size = 1) fc2 = fc_layer(input=fc1, size=800,
Layer(inputs = [Input("output"), Input("label")], type = "multi-class-cross-entropy", name = "cost") bias_attr=True,
#Layer(inputs = [Input("output"), Input("label")], type = "huber", name = "cost") act=SigmoidActivation())
Evaluator(inputs=["output", "label"], type = "classification_error", name = "classification_error")
Inputs("input", "label") output = fc_layer(input=[fc1, fc2], size=10,
Outputs("cost") bias_attr=True,
act=SoftmaxActivation())
lbl = data_layer(name ="label", size=1)
cost = classification_cost(input=output, label=lbl)
outputs(cost)
...@@ -71,16 +71,41 @@ class BaseSGDOptimizer(Optimizer): ...@@ -71,16 +71,41 @@ class BaseSGDOptimizer(Optimizer):
class MomentumOptimizer(BaseSGDOptimizer): class MomentumOptimizer(BaseSGDOptimizer):
"""
MomentumOptimizer.
When sparse=True, the update scheme:
.. math::
\\alpha_t &= \\alpha_{t-1} / k \\\\
\\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\
u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\
v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\
\\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t
where :math:`k` is momentum, :math:`\\lambda` is decay rate,
:math:`\\gamma_t` is learning rate at the t'th step.
:param sparse: with sparse support or not.
:type sparse: bool
"""
def extra_settings(self): def extra_settings(self):
default_momentum(self.momentum) default_momentum(self.momentum)
def to_setting_kwargs(self): def to_setting_kwargs(self):
return { if self.sparse:
'learning_method': 'momentum' return {
} 'learning_method': 'sparse_momentum'
}
else:
return {
'learning_method': 'momentum'
}
def __init__(self, momentum=None): def __init__(self, momentum=None, sparse=False):
self.momentum = momentum self.momentum = momentum
self.sparse = sparse
class AdamOptimizer(BaseSGDOptimizer): class AdamOptimizer(BaseSGDOptimizer):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册