OptimizerConfig.proto 4.7 KB
Newer Older
D
dzhwinter 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
D
dzhwinter 已提交
14
syntax = "proto2";
L
liaogang 已提交
15

D
dzhwinter 已提交
16 17 18 19 20
option optimize_for = LITE_RUNTIME;

package paddle;

message SGDConfig {
D
dzhwinter 已提交
21
  // SGD
D
dzhwinter 已提交
22 23 24
  // momentum: float >= 0. Parameter updates momentum.
  // decay: float >= 0. Learning rate decay over each update.
  // nesterov: boolean. Whether to apply Nesterov momentum.
L
liaogang 已提交
25 26 27
  optional double momentum = 21 [ default = 0.0 ];
  optional double decay = 23 [ default = 0.0 ];
  optional bool nesterov = 24 [ default = false ];
28
}
D
dzhwinter 已提交
29 30 31 32 33 34 35 36

message AdadeltaConfig {
  // Adadelta
  // It is recommended to leave it at the default value.
  // rho: float >= 0.
  // epsilon: float >= 0. Fuzz factor.
  // decay: float >= 0. Learning rate decay over each update.

L
liaogang 已提交
37 38 39 40 41
  // reference : [Adadelta - an adaptive learning rate
  // method](http://arxiv.org/abs/1212.5701)
  optional double rho = 33 [ default = 0.90 ];
  optional double epsilon = 31 [ default = 1e-5 ];
  optional double decay = 32 [ default = 0.0 ];
D
dzhwinter 已提交
42 43 44
}

message AdagradConfig {
L
liaogang 已提交
45 46 47
  // Adagrad
  // epsilon: float >= 0.
  // decay: float >= 0. Learning rate decay over each update.
D
dzhwinter 已提交
48

L
liaogang 已提交
49 50 51 52 53
  // reference : [Adaptive Subgradient Methods for Online Learning and
  // Stochastic
  // Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
  optional double epsilon = 41 [ default = 1e-5 ];
  optional double decay = 42 [ default = 0.0 ];
D
dzhwinter 已提交
54 55 56 57 58 59 60 61
}

message AdamConfig {
  // Adaj
  // beta_1: float, 0 < beta < 1. Generally close to 1.
  // beta_2: float, 0 < beta < 1. Generally close to 1.
  // epsilon: float >= 0. Fuzz factor.
  // decay: float >= 0. Learning rate decay over each update.
L
liaogang 已提交
62 63
  // reference : [Adam - A Method for Stochastic
  // Optimization](http://arxiv.org/abs/1412.6980v8)
D
dzhwinter 已提交
64 65 66 67 68 69
  optional double beta_1 = 41;
  optional double beta_2 = 42;
  optional double epsilon = 43;
  optional double decay = 44;
}

D
dzhwinter 已提交
70
message ConstLrConfig {
D
dzhwinter 已提交
71
  // learninRate Policy
L
liaogang 已提交
72
  optional double learning_rate = 1 [ default = 1.0 ];
D
dzhwinter 已提交
73 74
}

D
dzhwinter 已提交
75
message LinearLrConfig {
D
dzhwinter 已提交
76
  // learninRate Policy
L
liaogang 已提交
77
  optional double learning_rate = 1 [ default = 1.0 ];
D
dzhwinter 已提交
78 79
  optional double lr_decay_a = 2;
  optional double lr_decay_b = 3;
D
dzhwinter 已提交
80 81
}

82
message TensorProto {
L
liaogang 已提交
83 84 85 86 87 88 89 90
  enum DataType {
    PADDLE_ELEMENT_TYPE_INT32 = 0;
    PADDLE_ELEMENT_TYPE_UINT32 = 1;
    PADDLE_ELEMENT_TYPE_INT64 = 2;
    PADDLE_ELEMENT_TYPE_UINT64 = 3;
    PADDLE_ELEMENT_TYPE_FLOAT32 = 4;
    PADDLE_ELEMENT_TYPE_FLOAT64 = 5;
  }
D
dzhwinter 已提交
91
  optional DataType data_type = 1;
92 93 94
  repeated bytes content = 2;
}

D
dongzhihong 已提交
95 96
message LrPolicyState {
  // learninRate Policy
L
liaogang 已提交
97
  optional double learning_rate = 1 [ default = 1.0 ];
D
dongzhihong 已提交
98 99 100 101
  optional double lr_decay_a = 2;
  optional double lr_decay_b = 3;
}

D
dzhwinter 已提交
102
message SGDOptimizerState {
D
dongzhihong 已提交
103
  optional LrPolicyState lr_state = 101;
D
dzhwinter 已提交
104
  optional double num_sample_passed = 104;
D
dzhwinter 已提交
105 106 107 108
  // state
  optional TensorProto parameter = 1;
  optional TensorProto momentums = 2;
}
D
dzhwinter 已提交
109

D
dzhwinter 已提交
110 111
message AdadeltaOptimizerState {
  // learning rate policy
D
dongzhihong 已提交
112
  optional LrPolicyState lr_state = 101;
D
dzhwinter 已提交
113 114 115
  optional double num_sample_passed = 104;
  // state
  optional TensorProto parameter = 1;
D
dzhwinter 已提交
116 117 118
  optional TensorProto accum_gradient = 2;
  optional TensorProto accum_delta = 3;
  optional TensorProto update_delta = 4;
D
dzhwinter 已提交
119
}
D
dzhwinter 已提交
120

D
dzhwinter 已提交
121
message AdagradOptimizerState {
D
dongzhihong 已提交
122
  optional LrPolicyState lr_state = 101;
D
dzhwinter 已提交
123 124 125 126 127
  optional double num_sample_passed = 104;
  // state
  optional TensorProto parameter = 1;
  optional TensorProto accum_gradient = 2;
}
D
dzhwinter 已提交
128

D
dzhwinter 已提交
129
message AdamOptimizerState {
D
dongzhihong 已提交
130
  optional LrPolicyState lr_state = 101;
D
dzhwinter 已提交
131 132 133 134 135
  optional double num_sample_passed = 104;
  // state
  optional TensorProto parameter = 1;
  optional TensorProto momentums = 2;
  optional TensorProto velocitys = 3;
136
}
D
dzhwinter 已提交
137 138

message OptimizerConfig {
D
dzhwinter 已提交
139
  enum Optimizer {
L
liaogang 已提交
140 141 142 143
    SGD = 1;
    Adadelta = 2;
    Adagrad = 3;
    Adam = 4;
D
dzhwinter 已提交
144
  }
D
dzhwinter 已提交
145
  optional Optimizer optimizer = 1;
D
dzhwinter 已提交
146 147 148 149 150
  optional SGDConfig sgd = 3;
  optional AdadeltaConfig adadelta = 4;
  optional AdagradConfig adagrad = 5;
  optional AdamConfig adam = 6;

D
dzhwinter 已提交
151
  enum LrPolicy {
L
liaogang 已提交
152 153
    Const = 0;
    Linear = 1;
D
dzhwinter 已提交
154
  }
D
dzhwinter 已提交
155
  optional LrPolicy lr_policy = 11;
D
dzhwinter 已提交
156 157
  optional ConstLrConfig const_lr = 12;
  optional LinearLrConfig linear_lr = 13;
D
dzhwinter 已提交
158 159

  // common config of optimizer
D
dzhwinter 已提交
160 161 162 163
  // gradient clip when L2 exceeding value
  optional double clip_norm = 101;
  // gradient clip when L1 exceeding value
  optional double clip_value = 102;
D
dzhwinter 已提交
164
}