提交 2c5a6ac0 编写于 作者: Y Yu Yang 提交者: qingqing01

Optional fields to shrink generated proto size (#93)

* remove unnecessary field set in ParameterConfig, Evaluators, etc
上级 04876d03
...@@ -31,7 +31,7 @@ bool CRFLayer::init(const LayerMap& layerMap, ...@@ -31,7 +31,7 @@ bool CRFLayer::init(const LayerMap& layerMap,
} }
// coeff only affect bp, keep consistent with CostLayer // coeff only affect bp, keep consistent with CostLayer
coeff_ = config_.has_coeff() ? config_.coeff() : real(1.0); coeff_ = config_.coeff();
if (inputLayers_.size() == 3) { if (inputLayers_.size() == 3) {
weightLayer_ = inputLayers_[2]; weightLayer_ = inputLayers_[2];
} }
......
...@@ -26,11 +26,7 @@ namespace paddle { ...@@ -26,11 +26,7 @@ namespace paddle {
bool CostLayer::init(const LayerMap& layerMap, bool CostLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
bool ret = Layer::init(layerMap, parameterMap); bool ret = Layer::init(layerMap, parameterMap);
if (config_.has_coeff()) { coeff_ = config_.coeff();
coeff_ = config_.coeff(); // coeff only affact bp
} else {
coeff_ = real(1.0);
}
if (!ret) return ret; if (!ret) return ret;
CHECK_GE(inputLayers_.size(), 2UL); CHECK_GE(inputLayers_.size(), 2UL);
CHECK_LE(inputLayers_.size(), 3UL); CHECK_LE(inputLayers_.size(), 3UL);
......
dump_text.test dump_text.test
test_pydata_provider_wrapper.json test_pydata_provider_wrapper.json
*proto.bin
...@@ -299,7 +299,7 @@ sinclude(`ModelConfigLayer.proto.m4') ...@@ -299,7 +299,7 @@ sinclude(`ModelConfigLayer.proto.m4')
optional bool norm_by_times = 25; optional bool norm_by_times = 25;
// for CostLayers // for CostLayers
optional real coeff = 26; optional real coeff = 26 [default = 1.0];
// for AverageLayer // for AverageLayer
// can be set to: 'average', 'sum' or 'squarerootn' // can be set to: 'average', 'sum' or 'squarerootn'
......
...@@ -31,8 +31,8 @@ message ParameterUpdaterHookConfig { ...@@ -31,8 +31,8 @@ message ParameterUpdaterHookConfig {
message ParameterConfig { message ParameterConfig {
required string name = 1; required string name = 1;
required uint64 size = 2; required uint64 size = 2;
required real learning_rate = 3; optional real learning_rate = 3 [default = 1.0];
required real momentum = 4; optional real momentum = 4 [default = 0.0];
optional real initial_mean = 5 [default = 0.0]; optional real initial_mean = 5 [default = 0.0];
optional real initial_std = 6 [default = 0.01]; optional real initial_std = 6 [default = 0.01];
// use L2-regularization if decay_rate set and decay_rate_l1 not set // use L2-regularization if decay_rate set and decay_rate_l1 not set
...@@ -54,8 +54,8 @@ message ParameterConfig { ...@@ -54,8 +54,8 @@ message ParameterConfig {
optional int32 num_batches_regularization = 13 [default = 1]; optional int32 num_batches_regularization = 13 [default = 1];
// if is_sparse is true, para is sparse, else para is dense // if is_sparse is true, para is sparse, else para is dense
optional bool is_sparse = 14[default = false]; optional bool is_sparse = 14[default = false];
// if para is sparse, format should be "csc" or "csr" // if para is sparse, format should be "csc" or "csr", empty means is not sparse
optional string format = 15[default = "csr"]; optional string format = 15 [default = ""];
// sparse remote update or not // sparse remote update or not
optional bool sparse_remote_update = 16 [default = false]; optional bool sparse_remote_update = 16 [default = false];
// gradient clipping threshold, no clipping by default // gradient clipping threshold, no clipping by default
......
...@@ -114,15 +114,15 @@ g_layer_type_map = {} ...@@ -114,15 +114,15 @@ g_layer_type_map = {}
# Initialize global variables. We use this function so that we can # Initialize global variables. We use this function so that we can
# call parse_config() multiple times # call parse_config() multiple times
def init_config_environment( def init_config_environment(
g_default_momentum = 0., g_default_momentum = None,
g_default_decay_rate = 0., g_default_decay_rate = None,
g_default_initial_mean = 0., g_default_initial_mean = 0.,
g_default_initial_std = 0.01, g_default_initial_std = 0.01,
g_default_num_batches_regularization = 1, g_default_num_batches_regularization = None,
g_default_initial_strategy = 0, g_default_initial_strategy = 0,
g_default_initial_smart = False, g_default_initial_smart = False,
g_default_gradient_clipping_threshold = 0., g_default_gradient_clipping_threshold = None,
g_default_device = -1, g_default_device = None,
g_default_update_hooks = None, g_default_update_hooks = None,
g_default_compact_func = None, g_default_compact_func = None,
...@@ -1099,12 +1099,12 @@ def Evaluator( ...@@ -1099,12 +1099,12 @@ def Evaluator(
inputs, inputs,
chunk_scheme = None, chunk_scheme = None,
num_chunk_types = None, num_chunk_types = None,
classification_threshold = 0.5, classification_threshold = None,
positive_label = -1, positive_label = None,
dict_file = "", dict_file = None,
result_file = "", result_file = None,
num_results = 1, num_results = None,
delimited = True, delimited = None,
): ):
evaluator = g_config.model_config.evaluators.add() evaluator = g_config.model_config.evaluators.add()
evaluator.type = type evaluator.type = type
...@@ -1120,12 +1120,19 @@ def Evaluator( ...@@ -1120,12 +1120,19 @@ def Evaluator(
evaluator.num_chunk_types = num_chunk_types evaluator.num_chunk_types = num_chunk_types
g_current_submodel.evaluator_names.append(evaluator.name) g_current_submodel.evaluator_names.append(evaluator.name)
evaluator.classification_threshold = classification_threshold if classification_threshold is not None:
evaluator.positive_label = positive_label evaluator.classification_threshold = classification_threshold
evaluator.dict_file = dict_file if positive_label is not None:
evaluator.result_file = result_file evaluator.positive_label = positive_label
evaluator.num_results = num_results if dict_file is not None:
evaluator.delimited = delimited evaluator.dict_file = dict_file
if result_file is not None:
evaluator.result_file = result_file
if num_results is not None:
evaluator.num_results = num_results
if delimited is not None:
evaluator.delimited = delimited
class LayerBase(object): class LayerBase(object):
def __init__( def __init__(
...@@ -1137,7 +1144,7 @@ class LayerBase(object): ...@@ -1137,7 +1144,7 @@ class LayerBase(object):
device=None, device=None,
active_type="", active_type="",
drop_rate=0., drop_rate=0.,
coeff=1.): coeff=None):
config_assert('@' not in name, config_assert('@' not in name,
"layer name: %s contain special character @" % name) "layer name: %s contain special character @" % name)
global g_current_submodel global g_current_submodel
...@@ -1155,10 +1162,12 @@ class LayerBase(object): ...@@ -1155,10 +1162,12 @@ class LayerBase(object):
self.inputs = [self.inputs] self.inputs = [self.inputs]
self.config = g_config.model_config.layers.add() self.config = g_config.model_config.layers.add()
assert isinstance(self.config, LayerConfig)
self.config.name = name self.config.name = name
self.config.type = type self.config.type = type
self.config.active_type = active_type self.config.active_type = active_type
self.config.coeff = coeff if coeff is not None:
self.config.coeff = float(coeff)
if size != 0: if size != 0:
self.config.size = size self.config.size = size
if drop_rate != 0: if drop_rate != 0:
...@@ -1166,7 +1175,7 @@ class LayerBase(object): ...@@ -1166,7 +1175,7 @@ class LayerBase(object):
if device is not None: if device is not None:
self.config.device = device self.config.device = device
else: elif g_default_device is not None:
self.config.device = g_default_device self.config.device = g_default_device
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
...@@ -1236,10 +1245,12 @@ class LayerBase(object): ...@@ -1236,10 +1245,12 @@ class LayerBase(object):
if bias.parameter_name is None: if bias.parameter_name is None:
bias.parameter_name = gen_bias_parameter_name(self.config.name) bias.parameter_name = gen_bias_parameter_name(self.config.name)
if bias.parameter_name not in g_parameter_map: if bias.parameter_name not in g_parameter_map:
assert isinstance(self.config, LayerConfig)
Parameter( Parameter(
bias.parameter_name, bias.parameter_name,
size, size,
self.config.device, self.config.device if self.config.HasField('device') else None,
dims, dims,
bias.learning_rate, bias.learning_rate,
bias.momentum, bias.momentum,
...@@ -1265,7 +1276,7 @@ class LayerBase(object): ...@@ -1265,7 +1276,7 @@ class LayerBase(object):
input_index, input_index,
size, size,
dims=None, dims=None,
sparse = False, sparse = None,
format = "csr"): format = "csr"):
if dims is None: if dims is None:
# TODO(yuyang18): print warning and callstack here! # TODO(yuyang18): print warning and callstack here!
...@@ -1293,7 +1304,7 @@ class LayerBase(object): ...@@ -1293,7 +1304,7 @@ class LayerBase(object):
Parameter( Parameter(
input_config.parameter_name, input_config.parameter_name,
size, size,
self.config.device, self.config.device if self.config.HasField("device") else None,
dims, dims,
input_config.learning_rate, input_config.learning_rate,
input_config.momentum, input_config.momentum,
...@@ -1353,6 +1364,8 @@ class FCLayer(LayerBase): ...@@ -1353,6 +1364,8 @@ class FCLayer(LayerBase):
if sparse: if sparse:
psize = self.inputs[input_index].nnz psize = self.inputs[input_index].nnz
else:
sparse = None
self.create_input_parameter(input_index, psize, dims, sparse, format) self.create_input_parameter(input_index, psize, dims, sparse, format)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
...@@ -2836,27 +2849,44 @@ def Parameter( ...@@ -2836,27 +2849,44 @@ def Parameter(
para = g_config.model_config.parameters.add() para = g_config.model_config.parameters.add()
para.name = name para.name = name
para.size = size para.size = size
para.device = device if device is not None:
para.dims.extend(dims); para.device = int(device)
para.learning_rate = default(learning_rate, 1.) para.dims.extend(dims)
para.momentum = default(momentum, g_default_momentum)
if learning_rate is not None:
para.learning_rate = float(learning_rate)
momentum = default(momentum, g_default_momentum)
if momentum is not None:
para.momentum = float(momentum)
config_assert(not momentum or not decay_rate_l1, config_assert(not momentum or not decay_rate_l1,
"momentum and decay_rate_l1 cannot both be non-zero") "momentum and decay_rate_l1 cannot both be non-zero")
para.decay_rate = default(decay_rate, g_default_decay_rate)
decay_rate = default(decay_rate, g_default_decay_rate)
if decay_rate is not None:
para.decay_rate = decay_rate
if decay_rate_l1 is not None: if decay_rate_l1 is not None:
para.decay_rate_l1 = decay_rate_l1 para.decay_rate_l1 = decay_rate_l1
para.initial_std = default(initial_std, g_default_initial_std) para.initial_std = default(initial_std, g_default_initial_std)
para.initial_mean = default(initial_mean, g_default_initial_mean) para.initial_mean = default(initial_mean, g_default_initial_mean)
para.num_batches_regularization = default(
num_batches_regularization = default(
num_batches_regularization, g_default_num_batches_regularization) num_batches_regularization, g_default_num_batches_regularization)
if num_batches_regularization is not None:
para.num_batches_regularization = int(num_batches_regularization)
if sparse_remote_update is not None: if sparse_remote_update is not None:
para.sparse_remote_update = sparse_remote_update para.sparse_remote_update = sparse_remote_update
if sparse_remote_update: if sparse_remote_update:
g_config.opt_config.use_sparse_remote_updater = True g_config.opt_config.use_sparse_remote_updater = True
if sparse_update is not None: if sparse_update is not None:
para.sparse_update = sparse_update para.sparse_update = sparse_update
para.gradient_clipping_threshold = default( gradient_clipping_threshold = default(
gradient_clipping_threshold, g_default_gradient_clipping_threshold); gradient_clipping_threshold, g_default_gradient_clipping_threshold)
if gradient_clipping_threshold is not None:
para.gradient_clipping_threshold = gradient_clipping_threshold
para.initial_strategy = default(initial_strategy, g_default_initial_strategy) para.initial_strategy = default(initial_strategy, g_default_initial_strategy)
para.initial_smart = default(initial_smart, g_default_initial_smart) para.initial_smart = default(initial_smart, g_default_initial_smart)
if para.initial_smart: if para.initial_smart:
...@@ -2869,15 +2899,19 @@ def Parameter( ...@@ -2869,15 +2899,19 @@ def Parameter(
para.initial_std = 1. / math.sqrt(para.size) para.initial_std = 1. / math.sqrt(para.size)
if g_default_compact_func is not None: if g_default_compact_func is not None:
sparse, format, need_compact = g_default_compact_func(para.name) sparse, format, need_compact = g_default_compact_func(para.name)
para.is_sparse = default(sparse, False)
para.format = default(format, "") if sparse is not None:
para.need_compact = default(need_compact, False) para.is_sparse = sparse
if format is not None:
para.format = format
if need_compact is not None:
para.need_compact = need_compact
if is_static is not None: if is_static is not None:
para.is_static = is_static para.is_static = is_static
config_assert(not para.sparse_remote_update or not para.is_static, config_assert(not para.sparse_remote_update or not para.is_static,
"sparse_remote_update and is_static cannot both be true") "sparse_remote_update and is_static cannot both be true")
if is_shared is not None:
para.is_shared = default(is_shared, False) para.is_shared = is_shared
update_hooks = default(update_hooks, g_default_update_hooks) update_hooks = default(update_hooks, g_default_update_hooks)
......
...@@ -65,12 +65,12 @@ def evaluator_base( ...@@ -65,12 +65,12 @@ def evaluator_base(
name=None, name=None,
chunk_scheme=None, chunk_scheme=None,
num_chunk_types=None, num_chunk_types=None,
classification_threshold=0.5, classification_threshold=None,
positive_label=-1, positive_label=None,
dict_file="", dict_file=None,
result_file="", result_file=None,
num_results=1, num_results=None,
delimited=True): delimited=None):
""" """
Evaluator will evaluate the network status while training/testing. Evaluator will evaluate the network status while training/testing.
...@@ -105,9 +105,10 @@ def evaluator_base( ...@@ -105,9 +105,10 @@ def evaluator_base(
:type weight: LayerOutput. :type weight: LayerOutput.
""" """
# inputs type assertions. # inputs type assertions.
assert isinstance(classification_threshold, float) assert classification_threshold is None or isinstance(
assert isinstance(positive_label, int) classification_threshold, float)
assert isinstance(num_results, int) assert positive_label is None or isinstance(positive_label, int)
assert num_results is None or isinstance(num_results, int)
if not isinstance(input, list): if not isinstance(input, list):
input = [input] input = [input]
...@@ -136,7 +137,7 @@ def classification_error_evaluator( ...@@ -136,7 +137,7 @@ def classification_error_evaluator(
label, label,
name=None, name=None,
weight=None, weight=None,
threshold=0.5): threshold=None):
""" """
Classification Error Evaluator. It will print error rate for classification. Classification Error Evaluator. It will print error rate for classification.
...@@ -253,7 +254,7 @@ def pnpair_evaluator( ...@@ -253,7 +254,7 @@ def pnpair_evaluator(
def precision_recall_evaluator( def precision_recall_evaluator(
input, input,
label, label,
positive_label=-1, positive_label=None,
weight=None, weight=None,
name=None, name=None,
): ):
...@@ -494,7 +495,7 @@ def gradient_printer_evaluator( ...@@ -494,7 +495,7 @@ def gradient_printer_evaluator(
@wrap_name_default() @wrap_name_default()
def maxid_printer_evaluator( def maxid_printer_evaluator(
input, input,
num_results=1, num_results=None,
name=None, name=None,
): ):
""" """
...@@ -518,13 +519,14 @@ def maxid_printer_evaluator( ...@@ -518,13 +519,14 @@ def maxid_printer_evaluator(
""" """
evaluator_base(name=name, evaluator_base(name=name,
type="max_id_printer", type="max_id_printer",
input=input) input=input,
num_results=num_results)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def maxframe_printer_evaluator( def maxframe_printer_evaluator(
input, input,
num_results=1, num_results=None,
name=None, name=None,
): ):
""" """
...@@ -556,9 +558,9 @@ def maxframe_printer_evaluator( ...@@ -556,9 +558,9 @@ def maxframe_printer_evaluator(
@wrap_name_default() @wrap_name_default()
def seqtext_printer_evaluator( def seqtext_printer_evaluator(
input, input,
dict_file="", result_file,
result_file="", dict_file=None,
delimited=True, delimited=None,
name=None, name=None,
): ):
""" """
...@@ -616,6 +618,7 @@ def seqtext_printer_evaluator( ...@@ -616,6 +618,7 @@ def seqtext_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
assert isinstance(result_file, basestring)
evaluator_base(name=name, evaluator_base(name=name,
type="seq_text_printer", type="seq_text_printer",
input=input, input=input,
......
...@@ -79,7 +79,7 @@ class MomentumOptimizer(BaseSGDOptimizer): ...@@ -79,7 +79,7 @@ class MomentumOptimizer(BaseSGDOptimizer):
'learning_method': 'momentum' 'learning_method': 'momentum'
} }
def __init__(self, momentum=1e-3): def __init__(self, momentum=None):
self.momentum = momentum self.momentum = momentum
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册