提交 2c5a6ac0 编写于 作者: Y Yu Yang 提交者: qingqing01

Optional fields to shrink generated proto size (#93)

* remove unnecessary field set in ParameterConfig, Evaluators, etc
上级 04876d03
......@@ -31,7 +31,7 @@ bool CRFLayer::init(const LayerMap& layerMap,
}
// coeff only affect bp, keep consistent with CostLayer
coeff_ = config_.has_coeff() ? config_.coeff() : real(1.0);
coeff_ = config_.coeff();
if (inputLayers_.size() == 3) {
weightLayer_ = inputLayers_[2];
}
......
......@@ -26,11 +26,7 @@ namespace paddle {
bool CostLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
bool ret = Layer::init(layerMap, parameterMap);
if (config_.has_coeff()) {
coeff_ = config_.coeff(); // coeff only affact bp
} else {
coeff_ = real(1.0);
}
coeff_ = config_.coeff();
if (!ret) return ret;
CHECK_GE(inputLayers_.size(), 2UL);
CHECK_LE(inputLayers_.size(), 3UL);
......
dump_text.test
test_pydata_provider_wrapper.json
*proto.bin
......@@ -299,7 +299,7 @@ sinclude(`ModelConfigLayer.proto.m4')
optional bool norm_by_times = 25;
// for CostLayers
optional real coeff = 26;
optional real coeff = 26 [default = 1.0];
// for AverageLayer
// can be set to: 'average', 'sum' or 'squarerootn'
......
......@@ -31,8 +31,8 @@ message ParameterUpdaterHookConfig {
message ParameterConfig {
required string name = 1;
required uint64 size = 2;
required real learning_rate = 3;
required real momentum = 4;
optional real learning_rate = 3 [default = 1.0];
optional real momentum = 4 [default = 0.0];
optional real initial_mean = 5 [default = 0.0];
optional real initial_std = 6 [default = 0.01];
// use L2-regularization if decay_rate set and decay_rate_l1 not set
......@@ -54,8 +54,8 @@ message ParameterConfig {
optional int32 num_batches_regularization = 13 [default = 1];
// if is_sparse is true, para is sparse, else para is dense
optional bool is_sparse = 14[default = false];
// if para is sparse, format should be "csc" or "csr"
optional string format = 15[default = "csr"];
// if para is sparse, format should be "csc" or "csr", empty means is not sparse
optional string format = 15 [default = ""];
// sparse remote update or not
optional bool sparse_remote_update = 16 [default = false];
// gradient clipping threshold, no clipping by default
......
......@@ -114,15 +114,15 @@ g_layer_type_map = {}
# Initialize global variables. We use this function so that we can
# call parse_config() multiple times
def init_config_environment(
g_default_momentum = 0.,
g_default_decay_rate = 0.,
g_default_momentum = None,
g_default_decay_rate = None,
g_default_initial_mean = 0.,
g_default_initial_std = 0.01,
g_default_num_batches_regularization = 1,
g_default_num_batches_regularization = None,
g_default_initial_strategy = 0,
g_default_initial_smart = False,
g_default_gradient_clipping_threshold = 0.,
g_default_device = -1,
g_default_gradient_clipping_threshold = None,
g_default_device = None,
g_default_update_hooks = None,
g_default_compact_func = None,
......@@ -1099,12 +1099,12 @@ def Evaluator(
inputs,
chunk_scheme = None,
num_chunk_types = None,
classification_threshold = 0.5,
positive_label = -1,
dict_file = "",
result_file = "",
num_results = 1,
delimited = True,
classification_threshold = None,
positive_label = None,
dict_file = None,
result_file = None,
num_results = None,
delimited = None,
):
evaluator = g_config.model_config.evaluators.add()
evaluator.type = type
......@@ -1120,11 +1120,18 @@ def Evaluator(
evaluator.num_chunk_types = num_chunk_types
g_current_submodel.evaluator_names.append(evaluator.name)
if classification_threshold is not None:
evaluator.classification_threshold = classification_threshold
if positive_label is not None:
evaluator.positive_label = positive_label
if dict_file is not None:
evaluator.dict_file = dict_file
if result_file is not None:
evaluator.result_file = result_file
if num_results is not None:
evaluator.num_results = num_results
if delimited is not None:
evaluator.delimited = delimited
class LayerBase(object):
......@@ -1137,7 +1144,7 @@ class LayerBase(object):
device=None,
active_type="",
drop_rate=0.,
coeff=1.):
coeff=None):
config_assert('@' not in name,
"layer name: %s contain special character @" % name)
global g_current_submodel
......@@ -1155,10 +1162,12 @@ class LayerBase(object):
self.inputs = [self.inputs]
self.config = g_config.model_config.layers.add()
assert isinstance(self.config, LayerConfig)
self.config.name = name
self.config.type = type
self.config.active_type = active_type
self.config.coeff = coeff
if coeff is not None:
self.config.coeff = float(coeff)
if size != 0:
self.config.size = size
if drop_rate != 0:
......@@ -1166,7 +1175,7 @@ class LayerBase(object):
if device is not None:
self.config.device = device
else:
elif g_default_device is not None:
self.config.device = g_default_device
for input_index in xrange(len(self.inputs)):
......@@ -1236,10 +1245,12 @@ class LayerBase(object):
if bias.parameter_name is None:
bias.parameter_name = gen_bias_parameter_name(self.config.name)
if bias.parameter_name not in g_parameter_map:
assert isinstance(self.config, LayerConfig)
Parameter(
bias.parameter_name,
size,
self.config.device,
self.config.device if self.config.HasField('device') else None,
dims,
bias.learning_rate,
bias.momentum,
......@@ -1265,7 +1276,7 @@ class LayerBase(object):
input_index,
size,
dims=None,
sparse = False,
sparse = None,
format = "csr"):
if dims is None:
# TODO(yuyang18): print warning and callstack here!
......@@ -1293,7 +1304,7 @@ class LayerBase(object):
Parameter(
input_config.parameter_name,
size,
self.config.device,
self.config.device if self.config.HasField("device") else None,
dims,
input_config.learning_rate,
input_config.momentum,
......@@ -1353,6 +1364,8 @@ class FCLayer(LayerBase):
if sparse:
psize = self.inputs[input_index].nnz
else:
sparse = None
self.create_input_parameter(input_index, psize, dims, sparse, format)
self.create_bias_parameter(bias, self.config.size)
......@@ -2836,27 +2849,44 @@ def Parameter(
para = g_config.model_config.parameters.add()
para.name = name
para.size = size
para.device = device
para.dims.extend(dims);
para.learning_rate = default(learning_rate, 1.)
para.momentum = default(momentum, g_default_momentum)
if device is not None:
para.device = int(device)
para.dims.extend(dims)
if learning_rate is not None:
para.learning_rate = float(learning_rate)
momentum = default(momentum, g_default_momentum)
if momentum is not None:
para.momentum = float(momentum)
config_assert(not momentum or not decay_rate_l1,
"momentum and decay_rate_l1 cannot both be non-zero")
para.decay_rate = default(decay_rate, g_default_decay_rate)
decay_rate = default(decay_rate, g_default_decay_rate)
if decay_rate is not None:
para.decay_rate = decay_rate
if decay_rate_l1 is not None:
para.decay_rate_l1 = decay_rate_l1
para.initial_std = default(initial_std, g_default_initial_std)
para.initial_mean = default(initial_mean, g_default_initial_mean)
para.num_batches_regularization = default(
num_batches_regularization = default(
num_batches_regularization, g_default_num_batches_regularization)
if num_batches_regularization is not None:
para.num_batches_regularization = int(num_batches_regularization)
if sparse_remote_update is not None:
para.sparse_remote_update = sparse_remote_update
if sparse_remote_update:
g_config.opt_config.use_sparse_remote_updater = True
if sparse_update is not None:
para.sparse_update = sparse_update
para.gradient_clipping_threshold = default(
gradient_clipping_threshold, g_default_gradient_clipping_threshold);
gradient_clipping_threshold = default(
gradient_clipping_threshold, g_default_gradient_clipping_threshold)
if gradient_clipping_threshold is not None:
para.gradient_clipping_threshold = gradient_clipping_threshold
para.initial_strategy = default(initial_strategy, g_default_initial_strategy)
para.initial_smart = default(initial_smart, g_default_initial_smart)
if para.initial_smart:
......@@ -2869,15 +2899,19 @@ def Parameter(
para.initial_std = 1. / math.sqrt(para.size)
if g_default_compact_func is not None:
sparse, format, need_compact = g_default_compact_func(para.name)
para.is_sparse = default(sparse, False)
para.format = default(format, "")
para.need_compact = default(need_compact, False)
if sparse is not None:
para.is_sparse = sparse
if format is not None:
para.format = format
if need_compact is not None:
para.need_compact = need_compact
if is_static is not None:
para.is_static = is_static
config_assert(not para.sparse_remote_update or not para.is_static,
"sparse_remote_update and is_static cannot both be true")
para.is_shared = default(is_shared, False)
if is_shared is not None:
para.is_shared = is_shared
update_hooks = default(update_hooks, g_default_update_hooks)
......
......@@ -65,12 +65,12 @@ def evaluator_base(
name=None,
chunk_scheme=None,
num_chunk_types=None,
classification_threshold=0.5,
positive_label=-1,
dict_file="",
result_file="",
num_results=1,
delimited=True):
classification_threshold=None,
positive_label=None,
dict_file=None,
result_file=None,
num_results=None,
delimited=None):
"""
Evaluator will evaluate the network status while training/testing.
......@@ -105,9 +105,10 @@ def evaluator_base(
:type weight: LayerOutput.
"""
# inputs type assertions.
assert isinstance(classification_threshold, float)
assert isinstance(positive_label, int)
assert isinstance(num_results, int)
assert classification_threshold is None or isinstance(
classification_threshold, float)
assert positive_label is None or isinstance(positive_label, int)
assert num_results is None or isinstance(num_results, int)
if not isinstance(input, list):
input = [input]
......@@ -136,7 +137,7 @@ def classification_error_evaluator(
label,
name=None,
weight=None,
threshold=0.5):
threshold=None):
"""
Classification Error Evaluator. It will print error rate for classification.
......@@ -253,7 +254,7 @@ def pnpair_evaluator(
def precision_recall_evaluator(
input,
label,
positive_label=-1,
positive_label=None,
weight=None,
name=None,
):
......@@ -494,7 +495,7 @@ def gradient_printer_evaluator(
@wrap_name_default()
def maxid_printer_evaluator(
input,
num_results=1,
num_results=None,
name=None,
):
"""
......@@ -518,13 +519,14 @@ def maxid_printer_evaluator(
"""
evaluator_base(name=name,
type="max_id_printer",
input=input)
input=input,
num_results=num_results)
@evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default()
def maxframe_printer_evaluator(
input,
num_results=1,
num_results=None,
name=None,
):
"""
......@@ -556,9 +558,9 @@ def maxframe_printer_evaluator(
@wrap_name_default()
def seqtext_printer_evaluator(
input,
dict_file="",
result_file="",
delimited=True,
result_file,
dict_file=None,
delimited=None,
name=None,
):
"""
......@@ -616,6 +618,7 @@ def seqtext_printer_evaluator(
:param name: Evaluator name.
:type name: None|basestring
"""
assert isinstance(result_file, basestring)
evaluator_base(name=name,
type="seq_text_printer",
input=input,
......
......@@ -79,7 +79,7 @@ class MomentumOptimizer(BaseSGDOptimizer):
'learning_method': 'momentum'
}
def __init__(self, momentum=1e-3):
def __init__(self, momentum=None):
self.momentum = momentum
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册