提交 487dc670 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #72 from emailweixu/cos_sim_and_linear_comb

Change cos_sim to use CosSimLayer layer when size=1 and rename convex_comb_layer to linear_comb_layer     
...@@ -245,10 +245,10 @@ addto_layer ...@@ -245,10 +245,10 @@ addto_layer
:members: addto_layer :members: addto_layer
:noindex: :noindex:
convex_comb_layer linear_comb_layer
----------------- -----------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
:members: convex_comb_layer :members: linear_comb_layer
:noindex: :noindex:
interpolation_layer interpolation_layer
...@@ -281,6 +281,12 @@ tensor_layer ...@@ -281,6 +281,12 @@ tensor_layer
:members: tensor_layer :members: tensor_layer
:noindex: :noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
trans_layer trans_layer
------------ ------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
...@@ -341,12 +347,6 @@ rank_cost ...@@ -341,12 +347,6 @@ rank_cost
:members: rank_cost :members: rank_cost
:noindex: :noindex:
cos_sim
-------
.. automodule:: paddle.trainer_config_helpers.layers
:members: cos_sim
:noindex:
crf_layer crf_layer
----------------- -----------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
......
...@@ -21,18 +21,20 @@ limitations under the License. */ ...@@ -21,18 +21,20 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* @brief A layer for convex weighted average of vectors, * @brief A layer for weighted sum of vectors,
* which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND * which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND
* TRANSLATE * TRANSLATE
* - Input: the first input contains the convex weights (batchSize x weightDim), * - Input: the the size of the first input is weightDim,
* and the shape of second input is (batchSize x (weightdim*dataDim)). * and the size of the second input is weightdim * dataDim.
* - Output: the shape of output is (batchSize x dataDim). * - Output: the sizeof the output is dataDim
* \f[ * \f[
* out[i][j] = \sum_{j}(in0(i, j) * in1(i,j + i * dataDim)), * out(j) = \sum_{i}(in0(i) * in1(i,j + i * dataDim)),
* i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1) * i = 0,1,...,(weightDim-1); j = 0, 1,...,(dataDim-1)
* \f] * \f]
* Note that the above computation is for one sample. Multiple samples are
* processed in one batch.
* *
* The config file api is convex_comb_layer. * The config file api is linear_comb_layer.
*/ */
class ConvexCombinationLayer : public Layer { class ConvexCombinationLayer : public Layer {
protected: protected:
......
...@@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) { ...@@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) {
REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str());
MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut1 = getInputValue(0);
MatrixPtr prevOut2 = getInputValue(1); MatrixPtr prevOut2 = getInputValue(1);
outV->cosSim(*prevOut1, *prevOut2, kCosSimScale_); outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale());
} }
} }
...@@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) { ...@@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) {
outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0), outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0),
*getInputValue(1), *getInputGrad(0), *getInputValue(1), *getInputGrad(0),
*getInputGrad(1), kCosSimScale_); *getInputGrad(1), config_.cos_scale());
} }
} }
......
...@@ -36,7 +36,7 @@ namespace paddle { ...@@ -36,7 +36,7 @@ namespace paddle {
class CosSimLayer : public Layer { class CosSimLayer : public Layer {
public: public:
explicit CosSimLayer(const LayerConfig& config) explicit CosSimLayer(const LayerConfig& config)
: Layer(config), kCosSimScale_(5.0f) {} : Layer(config) {}
~CosSimLayer() {} ~CosSimLayer() {}
...@@ -44,8 +44,6 @@ public: ...@@ -44,8 +44,6 @@ public:
void forward(PassType passType); void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr); void backward(const UpdateCallback& callback = nullptr);
const real kCosSimScale_;
}; };
} // namespace paddle } // namespace paddle
...@@ -22,6 +22,8 @@ find_python_module(pip REQUIRED) ...@@ -22,6 +22,8 @@ find_python_module(pip REQUIRED)
find_python_module(wheel REQUIRED) find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED) find_python_module(google.protobuf REQUIRED)
add_subdirectory(paddle/trainer_config_helpers/tests)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/
DESTINATION opt/paddle/share/wheels DESTINATION opt/paddle/share/wheels
) )
...@@ -2264,6 +2264,9 @@ class ConvexCombinationLayer(LayerBase): ...@@ -2264,6 +2264,9 @@ class ConvexCombinationLayer(LayerBase):
name, 'convex_comb', size, inputs=inputs, device=device) name, 'convex_comb', size, inputs=inputs, device=device)
config_assert(len(self.inputs) == 2, config_assert(len(self.inputs) == 2,
'ConvexCombinationLayer must have 2 inputs') 'ConvexCombinationLayer must have 2 inputs')
config_assert(
size * self.get_input_layer(0).size == self.get_input_layer(1).size,
'Wrong input size for ConvexCombinationLayer')
self.set_layer_size(size) self.set_layer_size(size)
@config_layer('interpolation') @config_layer('interpolation')
...@@ -2313,6 +2316,9 @@ class CosSimVecMatLayer(LayerBase): ...@@ -2313,6 +2316,9 @@ class CosSimVecMatLayer(LayerBase):
self.config.cos_scale = cos_scale self.config.cos_scale = cos_scale
config_assert(len(self.inputs) == 2, config_assert(len(self.inputs) == 2,
'CosSimVecMatLayer must have 2 inputs') 'CosSimVecMatLayer must have 2 inputs')
config_assert(
size * self.get_input_layer(0).size == self.get_input_layer(1).size,
'Wrong input size for CosSimVecMatLayer')
@config_layer('sampling_id') @config_layer('sampling_id')
class SamplingIdLayer(LayerBase): class SamplingIdLayer(LayerBase):
...@@ -2361,6 +2367,7 @@ class CosSimLayer(LayerBase): ...@@ -2361,6 +2367,7 @@ class CosSimLayer(LayerBase):
self, self,
name, name,
inputs, inputs,
cos_scale=5,
device=None): device=None):
super(CosSimLayer, self).__init__( super(CosSimLayer, self).__init__(
name, 'cos', 1, inputs=inputs, device=device) name, 'cos', 1, inputs=inputs, device=device)
...@@ -2368,6 +2375,7 @@ class CosSimLayer(LayerBase): ...@@ -2368,6 +2375,7 @@ class CosSimLayer(LayerBase):
config_assert( config_assert(
self.get_input_layer(0).size == self.get_input_layer(1).size, self.get_input_layer(0).size == self.get_input_layer(1).size,
'inputs of CosSimLayer must have same dim') 'inputs of CosSimLayer must have same dim')
self.config.cos_scale = cos_scale
@config_layer('tensor') @config_layer('tensor')
......
...@@ -47,6 +47,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", ...@@ -47,6 +47,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer', 'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer',
'tensor_layer', 'selective_fc_layer', 'sampling_id_layer', 'tensor_layer', 'selective_fc_layer', 'sampling_id_layer',
'slope_intercept_layer', 'trans_full_matrix_projection', 'slope_intercept_layer', 'trans_full_matrix_projection',
'linear_comb_layer',
'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', 'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer',
'cross_entropy_with_selfnorm', 'cross_entropy', 'cross_entropy_with_selfnorm', 'cross_entropy',
'multi_binary_label_cross_entropy', 'multi_binary_label_cross_entropy',
...@@ -70,7 +71,8 @@ class LayerType(object): ...@@ -70,7 +71,8 @@ class LayerType(object):
POOLING_AVG = 'average' POOLING_AVG = 'average'
FC_LAYER = "fc" FC_LAYER = "fc"
COST = 'cost' COST = 'cost'
COSINE_SIM = 'cos_vm' COSINE_SIM_VEC = 'cos_vm'
COSINE_SIM = 'cos'
HSIGMOID = 'hsigmoid' HSIGMOID = 'hsigmoid'
CONV_LAYER = "conv" CONV_LAYER = "conv"
POOL_LAYER = "pool" POOL_LAYER = "pool"
...@@ -102,7 +104,7 @@ class LayerType(object): ...@@ -102,7 +104,7 @@ class LayerType(object):
SEL_FC_LAYER = "selective_fc" SEL_FC_LAYER = "selective_fc"
SAMPLING_ID_LAYER = "sampling_id" SAMPLING_ID_LAYER = "sampling_id"
SLOPE_INTERCEPT_LAYER = "slope_intercept" SLOPE_INTERCEPT_LAYER = "slope_intercept"
CONVEX_COMBINATION_LAYER = "convex_comb" LINEAR_COMBINATION_LAYER = "convex_comb"
BLOCK_EXPAND = "blockexpand" BLOCK_EXPAND = "blockexpand"
CTC_LAYER = "ctc" CTC_LAYER = "ctc"
...@@ -1171,13 +1173,16 @@ def power_layer(input, weight, name=None, layer_attr=None): ...@@ -1171,13 +1173,16 @@ def power_layer(input, weight, name=None, layer_attr=None):
@layer_support() @layer_support()
def scaling_layer(input, weight, name=None, layer_attr=None): def scaling_layer(input, weight, name=None, layer_attr=None):
""" """
A layer for each row of a matrix, multiplying with a element of a vector. A layer for multiplying input vector by weight scalar.
.. math:: .. math::
y.row[i] = w[i] * x.row[i] y = w x
where :math:`x` is (batchSize x dataDim) input, :math:`w` is where :math:`x` is size=dataDim input, :math:`w` is size=1 weight,
(batchSize x 1) weight vector, and :math:`y` is (batchSize x dataDim) output. and :math:`y` is size=dataDim output.
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
The example usage is: The example usage is:
...@@ -1251,11 +1256,14 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): ...@@ -1251,11 +1256,14 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
.. math:: .. math::
similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b} similarity = cos(\\theta) = {\\mathbf{a} \\cdot \\mathbf{b}
\\over \\|\\mathbf{b}\\| \\|\\mathbf{b}\\|} \\over \\|\\mathbf{a}\\| \\|\\mathbf{b}\\|}
The size of a is M, size of b is M*N,
Similarity will be calculated N times by step M. The output size is
N. The scale will be multiplied to similarity.
And the input dimension is :math:`a \in R^M`, :math:`b \in R^{MN}`. The Note that the above computation is for one sample. Multiple samples are
similarity will be calculated N times by step M. The output dimension is processed in one batch.
:math:`R^N`. The scale will be multiplied to similarity.
:param name: layer name :param name: layer name
:type name: basestring :type name: basestring
...@@ -1272,9 +1280,18 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): ...@@ -1272,9 +1280,18 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if size == 1:
Layer( Layer(
name=name, name=name,
type=LayerType.COSINE_SIM, type=LayerType.COSINE_SIM,
cos_scale=scale,
inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
else:
Layer(
name=name,
type=LayerType.COSINE_SIM_VEC,
size=size, size=size,
cos_scale=scale, cos_scale=scale,
inputs=[a.name, b.name], inputs=[a.name, b.name],
...@@ -2911,29 +2928,37 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0): ...@@ -2911,29 +2928,37 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0):
@wrap_name_default() @wrap_name_default()
def convex_comb_layer(input, size, name=None): def linear_comb_layer(weights, vectors, size, name=None):
""" """
A layer for convex weighted average of vectors takes two inputs. A layer for weighted sum of vectors takes two inputs.
- Input: a vector containing the convex weights (batchSize x weightdim), - Input: size of weights is M
and a matrix in a vector form (batchSize x (weightdim * datadim)). size of vectors is M*N
- Output: a vector (batchSize * datadim). - Output: a vector of size=N
.. math:: .. math::
y[i][j] = \sum_{j}(x_{1}(i, j) * x_{2}(i,j + i * dataDim)), z(i) = \sum_{j=0}^{M-1} x(j) y(i+Nj)
where :math:`0 \le i \le N-1`
Or in the matrix notation:
.. math::
i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1) z = x^T Y
In this formular: In this formular:
- :math:`x_{1}`: the first input. - :math:`x`: weights
- :math:`x_{2}`: the second input. - :math:`y`: vectors.
- :math:`y`: the output. - :math:`z`: the output.
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
The simple usage is: The simple usage is:
.. code-block:: python .. code-block:: python
convex_comb = convex_comb_layer(input=inputs, linear_comb = linear_comb_layer(weighs=weight, vectors=vectors,
size=elem_dim) size=elem_dim)
:param input: The input layers. :param input: The input layers.
...@@ -2946,15 +2971,16 @@ def convex_comb_layer(input, size, name=None): ...@@ -2946,15 +2971,16 @@ def convex_comb_layer(input, size, name=None):
:rtype: LayerOutput :rtype: LayerOutput
""" """
assert isinstance(input, list) or isinstance(input, tuple)
assert len(input) == 2
Layer( Layer(
name=name, name=name,
type=LayerType.CONVEX_COMBINATION_LAYER, type=LayerType.LINEAR_COMBINATION_LAYER,
size=size, size=size,
inputs=[Input(input[0].name), Input(input[1].name)], inputs=[Input(weights.name), Input(vectors.name)],
) )
return LayerOutput(name, LayerType.CONVEX_COMBINATION_LAYER, input, size=size) return LayerOutput(name, LayerType.LINEAR_COMBINATION_LAYER,
[weights, vectors], size=size)
convex_comb_layer = linear_comb_layer
@wrap_name_default() @wrap_name_default()
def block_expand_layer(input, def block_expand_layer(input,
......
#################### test_config_parser #########################
add_test(NAME layers_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
python ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.config_parser import parse_config_and_serialize
if __name__ == '__main__':
parse_config_and_serialize(
'trainer_config_helpers/tests/layers_test_config.py', '')
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
num_classes = 5
x = data_layer(name="input1", size=3)
y = data_layer(name="input2", size=5)
x1 = fc_layer(input=x, size=5)
y1 = fc_layer(input=y, size=5)
y2 = fc_layer(input=y, size=15)
cos1 = cos_sim(a=x1, b=y1)
cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
out = fc_layer(input=[cos1, cos3, linear_comb],
size=num_classes,
act=SoftmaxActivation())
outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
settings(
batch_size=10,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册