diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 9898dc083ebb1783a0e2ddd12afaa9c3d5a79e98..1dc1383dd2e0b3111732fe9bfb14d46a547dcc02 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(gserver) add_subdirectory(pserver) add_subdirectory(trainer) add_subdirectory(scripts) +add_subdirectory(optimizer) # Do not build go directory until go cmake is working smoothly. # if(CMAKE_Go_COMPILER) diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 62d5b9e38b21ee82d1e78c3bde5aa5df7e4a33ee..9b98dd3fde4d141a35d93c0981acb287831c3eaf 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -5,6 +5,7 @@ set(proto_filenames ParameterConfig.proto ParameterService.proto TrainerConfig.proto + OptimizerConfig.proto ParameterServerConfig.proto) set(PROTO_GEN) diff --git a/proto/OptimizerConfig.proto b/proto/OptimizerConfig.proto new file mode 100644 index 0000000000000000000000000000000000000000..c1080f4e168a363ca1d637834629206a3513a5bc --- /dev/null +++ b/proto/OptimizerConfig.proto @@ -0,0 +1,99 @@ +syntax = "proto2"; + +option optimize_for = LITE_RUNTIME; + +package paddle; + +message SGDConfig { + // SGD + // momentum: float >= 0. Parameter updates momentum. + // decay: float >= 0. Learning rate decay over each update. + // nesterov: boolean. Whether to apply Nesterov momentum. + optional double momentum = 21 [default = 0.0]; + optional double decay = 23 [default = 0.0]; + optional bool nesterov =24 [default = false]; + + + +message AdadeltaConfig { + // Adadelta + // It is recommended to leave it at the default value. + // rho: float >= 0. + // epsilon: float >= 0. Fuzz factor. + // decay: float >= 0. Learning rate decay over each update. + + // reference : [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701) + optional double rho = 33 [default = 0.90]; + optional double epsilon = 31 [default = 1e-5]; + optional double decay = 32 [default = 0.0]; + +} + +message AdagradConfig { +// Adagrad +// epsilon: float >= 0. +// decay: float >= 0. Learning rate decay over each update. + +// reference : [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + optional double epsilon = 41 [default = 1e-5]; + optional double decay = 42 [default = 0.0]; +} + +message AdamConfig { + // Adaj + // beta_1: float, 0 < beta < 1. Generally close to 1. + // beta_2: float, 0 < beta < 1. Generally close to 1. + // epsilon: float >= 0. Fuzz factor. + // decay: float >= 0. Learning rate decay over each update. + // reference : [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8) + optional double beta_1 = 41; + optional double beta_2 = 42; + optional double epsilon = 43; + optional double decay = 44; +} + +message LearningRateConfig { + // learninRate Policy + required double learning_rate = 40 [default = 1.0]; + optional double lr_decay_a = 25; + optional double lr_decay_b = 26; +} + + +message OptimizerConfig { + // common config of optimizer + required string optimizer_name = 1; + // algorithm config + enum OptimizerType { + SGD = 1; + Adadelta = 2; + Adagrad = 3; + Adam = 4; + } + required OptimizerType optimizer_type = 2; + optional SGDConfig sgd = 3; + optional AdadeltaConfig adadelta = 4; + optional AdagradConfig adagrad = 5; + optional AdamConfig adam = 6; + + // learning rate runtime policy config + // lr_policy : string + // ConstLr = 0; + // LinearLr = 1; + required string lr_policy = 11; + required LearningRateConfig lr_config = 12; + optional uint64 num_sample_passed = 13 [default = 0]; + + // reqularizer config + enum RegularizerType { + L1 = 1; + L2 = 2; + L1L2 = 3; + } + optional RegularizerType regularizer_type = 21; + + // common config of optimizer + optional double clipnorm = 101; + optional double clipvalue = 102; + +}