提交 eb3bf9ea 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #360 from emailweixu/sum_cost

Add SumCost
...@@ -465,6 +465,11 @@ SumOfSquaresCostLayer ...@@ -465,6 +465,11 @@ SumOfSquaresCostLayer
.. doxygenclass:: paddle::SumOfSquaresCostLayer .. doxygenclass:: paddle::SumOfSquaresCostLayer
:members: :members:
SumCostLayer
`````````````````````
.. doxygenclass:: paddle::SumCostLayer
:members:
CosSimLayer CosSimLayer
----------- -----------
.. doxygenclass:: paddle::CosSimLayer .. doxygenclass:: paddle::CosSimLayer
......
...@@ -407,6 +407,12 @@ hsigmoid ...@@ -407,6 +407,12 @@ hsigmoid
:members: hsigmoid :members: hsigmoid
:noindex: :noindex:
sum_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: sum_cost
:noindex:
Check Layer Check Layer
============ ============
......
...@@ -562,4 +562,39 @@ void HuberTwoClass::backwardImpIn( ...@@ -562,4 +562,39 @@ void HuberTwoClass::backwardImpIn(
} }
} }
/**
* This cost layer compute the sum of its input as loss.
* \f[
* o(i) = \sum_{j=1}^D y_{ij}
* \f]
*/
class SumCostLayer : public Layer {
public:
explicit SumCostLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
bool ret = Layer::init(layerMap, parameterMap);
if (!ret) return ret;
CHECK_EQ(inputLayers_.size(), 1UL);
return true;
}
virtual void forward(PassType passType) {
Layer::forward(passType);
const MatrixPtr& input = getInputValue(0);
/* malloc memory for the output_ if necessary */
int batchSize = input->getHeight();
int size = 1;
resizeOutput(batchSize, size);
output_.value->sumRows(*input);
}
virtual void backward(const UpdateCallback& callback = nullptr) {
getInputGrad(0)->add((real)1);
}
};
REGISTER_LAYER(sum_cost, SumCostLayer);
} // namespace paddle } // namespace paddle
...@@ -129,7 +129,7 @@ protected: ...@@ -129,7 +129,7 @@ protected:
* This cost layer compute Euclidean (L2) loss for real-valued regression * This cost layer compute Euclidean (L2) loss for real-valued regression
* tasks. * tasks.
* \f[ * \f[
* L = \frac{1}{2N} \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2} * L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
* \f] * \f]
*/ */
class SumOfSquaresCostLayer : public CostLayer { class SumOfSquaresCostLayer : public CostLayer {
......
...@@ -998,6 +998,19 @@ TEST(Layer, rankCostLayer) { ...@@ -998,6 +998,19 @@ TEST(Layer, rankCostLayer) {
} }
} }
TEST(Layer, sumCostLayer) {
TestConfig config;
config.layerConfig.set_type("sum_cost");
config.biasSize = 0;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "sum_cost", 100, false, useGpu);
}
}
TEST(Layer, weightedRankCostLayer) { TEST(Layer, weightedRankCostLayer) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("rank-cost"); config.layerConfig.set_type("rank-cost");
......
...@@ -1903,6 +1903,7 @@ define_cost('SumOfSquaresCostLayer', 'square_error') ...@@ -1903,6 +1903,7 @@ define_cost('SumOfSquaresCostLayer', 'square_error')
define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost('HuberTwoClass', 'huber') define_cost('HuberTwoClass', 'huber')
define_cost('SumCost', 'sum_cost')
@config_layer('hsigmoid') @config_layer('hsigmoid')
class HierarchicalSigmoidLayer(LayerBase): class HierarchicalSigmoidLayer(LayerBase):
......
...@@ -53,7 +53,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", ...@@ -53,7 +53,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', 'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer',
'nce_layer', 'nce_layer',
'cross_entropy_with_selfnorm', 'cross_entropy', 'cross_entropy_with_selfnorm', 'cross_entropy',
'multi_binary_label_cross_entropy', 'multi_binary_label_cross_entropy', 'sum_cost',
'rank_cost', 'lambda_cost', 'huber_cost', 'rank_cost', 'lambda_cost', 'huber_cost',
'block_expand_layer', 'block_expand_layer',
'maxout_layer', 'out_prod_layer', 'print_layer' 'maxout_layer', 'out_prod_layer', 'print_layer'
...@@ -130,6 +130,7 @@ class LayerType(object): ...@@ -130,6 +130,7 @@ class LayerType(object):
CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm" CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm"
SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy"
MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy"
SUM_COST = "sum_cost"
@staticmethod @staticmethod
def is_layer_type(type_name): def is_layer_type(type_name):
...@@ -4053,8 +4054,6 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -4053,8 +4054,6 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
:type input: LayerOutput. :type input: LayerOutput.
:param label: The input label. :param label: The input label.
:type input: LayerOutput. :type input: LayerOutput.
:param type: The type of cost.
:type type: basestring.
:param name: The name of this layers. It is not necessary. :param name: The name of this layers. It is not necessary.
:type name: None|basestring. :type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
...@@ -4091,8 +4090,6 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, ...@@ -4091,8 +4090,6 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
:type input: LayerOutput. :type input: LayerOutput.
:param label: The input label. :param label: The input label.
:type input: LayerOutput. :type input: LayerOutput.
:param type: The type of cost.
:type type: basestring.
:param name: The name of this layers. It is not necessary. :param name: The name of this layers. It is not necessary.
:type name: None|basestring. :type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
...@@ -4117,6 +4114,36 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, ...@@ -4117,6 +4114,36 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
parents=[input, label], size=1) parents=[input, label], size=1)
@wrap_name_default()
@layer_support()
def sum_cost(input, name=None, layer_attr=None):
"""
A loss layer which calculate the sum of the input as loss
.. code-block:: python
cost = sum_cost(input)
:param input: The first input layer.
:type input: LayerOutput.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput.
"""
Layer(name=name,
type=LayerType.SUM_COST,
inputs=[input.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name,
LayerType.SUM_COST,
parents=[input])
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
......
...@@ -23,6 +23,17 @@ layers { ...@@ -23,6 +23,17 @@ layers {
size: 10 size: 10
active_type: "" active_type: ""
} }
layers {
name: "__fc_layer_0__"
type: "fc"
size: 4
active_type: "tanh"
inputs {
input_layer_name: "input"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers { layers {
name: "__ctc_layer_0__" name: "__ctc_layer_0__"
type: "ctc" type: "ctc"
...@@ -36,17 +47,6 @@ layers { ...@@ -36,17 +47,6 @@ layers {
} }
norm_by_times: false norm_by_times: false
} }
layers {
name: "__fc_layer_0__"
type: "fc"
size: 4
active_type: "tanh"
inputs {
input_layer_name: "input"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers { layers {
name: "crf_label" name: "crf_label"
type: "data" type: "data"
...@@ -191,6 +191,16 @@ layers { ...@@ -191,6 +191,16 @@ layers {
} }
coeff: 1.0 coeff: 1.0
} }
layers {
name: "__sum_cost_0__"
type: "sum_cost"
size: 1
active_type: ""
inputs {
input_layer_name: "__fc_layer_0__"
}
coeff: 1.0
}
parameters { parameters {
name: "___fc_layer_0__.w0" name: "___fc_layer_0__.w0"
size: 800 size: 800
...@@ -241,14 +251,15 @@ output_layer_names: "__cross_entropy_0__" ...@@ -241,14 +251,15 @@ output_layer_names: "__cross_entropy_0__"
output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__" output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
sub_models { sub_models {
name: "root" name: "root"
layer_names: "input" layer_names: "input"
layer_names: "labels" layer_names: "labels"
layer_names: "probs" layer_names: "probs"
layer_names: "xe-label" layer_names: "xe-label"
layer_names: "__ctc_layer_0__"
layer_names: "__fc_layer_0__" layer_names: "__fc_layer_0__"
layer_names: "__ctc_layer_0__"
layer_names: "crf_label" layer_names: "crf_label"
layer_names: "__crf_layer_0__" layer_names: "__crf_layer_0__"
layer_names: "left" layer_names: "left"
...@@ -264,6 +275,7 @@ sub_models { ...@@ -264,6 +275,7 @@ sub_models {
layer_names: "huber_label" layer_names: "huber_label"
layer_names: "__huber_cost_0__" layer_names: "__huber_cost_0__"
layer_names: "__multi_binary_label_cross_entropy_0__" layer_names: "__multi_binary_label_cross_entropy_0__"
layer_names: "__sum_cost_0__"
input_layer_names: "input" input_layer_names: "input"
input_layer_names: "labels" input_layer_names: "labels"
input_layer_names: "crf_label" input_layer_names: "crf_label"
...@@ -284,6 +296,7 @@ sub_models { ...@@ -284,6 +296,7 @@ sub_models {
output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__" output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
is_recurrent_layer_group: false is_recurrent_layer_group: false
} }
...@@ -11,8 +11,9 @@ labels = data_layer(name='labels', size=5000) ...@@ -11,8 +11,9 @@ labels = data_layer(name='labels', size=5000)
probs = data_layer(name='probs', size=10) probs = data_layer(name='probs', size=10)
xe_label = data_layer(name='xe-label', size=10) xe_label = data_layer(name='xe-label', size=10)
hidden = fc_layer(input=seq_in, size=4)
outputs(ctc_layer(input=seq_in, label=labels), outputs(ctc_layer(input=seq_in, label=labels),
crf_layer(input=fc_layer(input=seq_in, size=4), crf_layer(input=hidden,
label=data_layer(name='crf_label', size=4)), label=data_layer(name='crf_label', size=4)),
rank_cost(left=data_layer(name='left', size=1), rank_cost(left=data_layer(name='left', size=1),
right=data_layer(name='right', size=1), right=data_layer(name='right', size=1),
...@@ -23,4 +24,5 @@ outputs(ctc_layer(input=seq_in, label=labels), ...@@ -23,4 +24,5 @@ outputs(ctc_layer(input=seq_in, label=labels),
cross_entropy_with_selfnorm(input=probs, label=xe_label), cross_entropy_with_selfnorm(input=probs, label=xe_label),
huber_cost(input=data_layer(name='huber_probs', size=1), huber_cost(input=data_layer(name='huber_probs', size=1),
label=data_layer(name='huber_label', size=1)), label=data_layer(name='huber_label', size=1)),
multi_binary_label_cross_entropy(input=probs, label=xe_label)) multi_binary_label_cross_entropy(input=probs, label=xe_label),
sum_cost(hidden))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册