diff --git a/paddle/gserver/layers/DotProdLayer.cpp b/paddle/gserver/layers/DotProdLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ae71a3d4eb40419c72921cae58594fe0cec3734c --- /dev/null +++ b/paddle/gserver/layers/DotProdLayer.cpp @@ -0,0 +1,95 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +/** + * @brief A layer for computing the dot product of two vectors + * Input1: vector (batchSize * dim) + * Input2: vector (batchSize * dim) + * Output: a matrix: (batchSize * 1) + */ + +class DotProdLayer : public Layer { +public: + explicit DotProdLayer(const LayerConfig& config) : Layer(config) {} + + ~DotProdLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(dot_prod, DotProdLayer); + +bool DotProdLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + CHECK_EQ(inputLayers_.size(), 2U); + CHECK_EQ(1, getSize()) << "Dimension mismatch"; + + return true; +} + +void DotProdLayer::forward(PassType passType) { + Layer::forward(passType); + + MatrixPtr inV0 = getInputValue(0); + MatrixPtr inV1 = getInputValue(1); + + size_t batchSize = inV0->getHeight(); + CHECK_EQ(inV1->getHeight(), batchSize); + + { + REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); + reserveOutput(batchSize, 1); + } + + MatrixPtr outV = getOutputValue(); + { + REGISTER_TIMER_INFO("FwDotProdTimer", getName().c_str()); + outV->sumOfProducts(*inV0, *inV1, 1, 0); + } +} + +void DotProdLayer::backward(const UpdateCallback& callback) { + MatrixPtr inV0 = getInputValue(0); + MatrixPtr inV1 = getInputValue(1); + MatrixPtr outG = getOutputGrad(); + MatrixPtr inG0 = getInputGrad(0); + MatrixPtr inG1 = getInputGrad(1); + + { + REGISTER_TIMER_INFO("BwDotProdTimer", getName().c_str()); + + if (inG0) { + inG0->addRowScale(0, *inV1, *outG); + } + + if (inG1) { + inG1->addRowScale(0, *inV0, *outG); + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 3517d293e3c901caaa19952b04e56d1ef0d2b46e..de2db0b3f7604ee9108dd5ffab7ab9d04e4083d7 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1081,6 +1081,21 @@ TEST(Layer, InterpolationLayer) { } } +TEST(Layer, DotProdLayer) { + TestConfig config; + config.layerConfig.set_type("dot_prod"); + config.layerConfig.set_size(1); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "dot_prod", 100, false, useGpu); + } +} + TEST(Layer, OuterProdLayer) { TestConfig config; config.layerConfig.set_type("out_prod"); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5bd68e211ac1c8e05f40dc3ca37eef99f32af47f..6d1cc5ad709146a2d1d13496e27cb1847c798d58 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3209,6 +3209,15 @@ class SubNestedSequenceLayer(LayerBase): self.set_layer_size(size) +@config_layer('dot_prod') +class DotProdLayer(LayerBase): + def __init__(self, name, inputs, device=None): + super(DotProdLayer, self).__init__( + name, 'dot_prod', 0, inputs, device=device) + config_assert(len(inputs) == 2, 'DotProdLayer must have 2 inputs') + self.set_layer_size(1) + + @config_layer('out_prod') class OuterProdLayer(LayerBase): def __init__(self, name, inputs, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a02eba007ddf929ff92df995df253f5a386bac7b..388535d53a9d1d6747ac89cb698f3a1f496b5f7c 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -115,6 +115,7 @@ __all__ = [ 'huber_classification_cost', 'block_expand_layer', 'maxout_layer', + 'dot_prod_layer', 'out_prod_layer', 'printer_layer', 'print_layer', @@ -197,6 +198,7 @@ class LayerType(object): SCALING_LAYER = 'scaling' TRANS_LAYER = 'trans' ROTATE_LAYER = 'rotate' + DOT_PROD_LAYER = 'dot_prod' OUT_PROD_LAYER = 'out_prod' FEATURE_MAP_EXPAND_LAYER = 'featmap_expand' @@ -4140,6 +4142,45 @@ def maxid_layer(input, name=None, layer_attr=None): size=l.config.size) +@wrap_name_default() +def dot_prod_layer(input1, input2, name=None, layer_attr=None): + """ + A layer for computing the dot product of two vectors. + + The example usage is: + + .. code-block:: python + + dot_prod = dot_prod_layer(input1=vec1, input2=vec2) + + :param name: The name of this layer. It is optional. + :type name: basestring + :param input1: The first input layer. + :type input: LayerOutput + :param input2: The second input layer. + :type input2: LayerOutput + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute. + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input1, LayerOutput) + assert isinstance(input2, LayerOutput) + assert input1.size == input2.size, ("Two inputs should have the same size.") + + l = Layer( + name=name, + type=LayerType.DOT_PROD_LAYER, + inputs=[input1.name, input2.name], + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.DOT_PROD_LAYER, + parents=[input1, input2], + size=l.config.size) + + @wrap_name_default() def out_prod_layer(input1, input2, name=None, layer_attr=None): """