From 1644c72accb59c325c7e17bb1bb46e03391a4c27 Mon Sep 17 00:00:00 2001 From: wangmeng28 Date: Wed, 11 Oct 2017 16:07:30 +0800 Subject: [PATCH] Add framework of the factorization machine layer --- doc/api/v2/config/layer.rst | 15 +++-- .../layers/FactorizationMachineLayer.cpp | 65 +++++++++++++++++++ .../layers/FactorizationMachineLayer.h | 59 +++++++++++++++++ paddle/gserver/tests/test_LayerGrad.cpp | 19 ++++++ proto/ModelConfig.proto | 3 + python/paddle/trainer/config_parser.py | 15 +++++ .../paddle/trainer_config_helpers/layers.py | 65 +++++++++++++++++++ .../tests/configs/file_list.sh | 3 +- .../test_factorization_machine.protostr | 39 +++++++++++ .../configs/test_factorization_machine.py | 9 +++ 10 files changed, 287 insertions(+), 5 deletions(-) create mode 100644 paddle/gserver/layers/FactorizationMachineLayer.cpp create mode 100644 paddle/gserver/layers/FactorizationMachineLayer.h create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index d4e9d53e5c0..89d6953c330 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -54,7 +54,7 @@ img_conv .. _api_v2.layer_context_projection: -context_projection +context_projection ------------------ .. autoclass:: paddle.v2.layer.context_projection :noindex: @@ -70,7 +70,7 @@ Image Pooling Layer img_pool -------- .. autoclass:: paddle.v2.layer.img_pool - :noindex: + :noindex: spp --- @@ -99,7 +99,7 @@ sum_to_one_norm --------------- .. autoclass:: paddle.v2.layer.sum_to_one_norm :noindex: - + cross_channel_norm ------------------ .. autoclass:: paddle.v2.layer.cross_channel_norm @@ -109,7 +109,7 @@ row_l2_norm ----------- .. autoclass:: paddle.v2.layer.row_l2_norm :noindex: - + Recurrent Layers ================ @@ -395,6 +395,13 @@ multiplex .. autoclass:: paddle.v2.layer.multiplex :noindex: +Factorization Machine Layer +============================ + +factorization_machine +--------------------- +.. autoclass:: paddle.v2.layer.factorization_machine + :noindex: Slicing and Joining Layers ========================== diff --git a/paddle/gserver/layers/FactorizationMachineLayer.cpp b/paddle/gserver/layers/FactorizationMachineLayer.cpp new file mode 100644 index 00000000000..5456bf2601e --- /dev/null +++ b/paddle/gserver/layers/FactorizationMachineLayer.cpp @@ -0,0 +1,65 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "FactorizationMachineLayer.h" +#include +#include +#include "paddle/math/SparseMatrix.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(factorization_machine, FactorizationMachineLayer); + +bool FactorizationMachineLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + + factorSize_ = config_.factor_size(); + + /* initialize the latentVectors_ */ + CHECK_EQ(inputLayers_.size(), 1UL); + size_t height = inputLayers_[0]->getSize(); + latentVectors_.reset(new Weight(height, factorSize_, parameters_[0])); + + return true; +} + +void FactorizationMachineLayer::forward(PassType passType) { + Layer::forward(passType); + + auto input = getInput(0); + + int batchSize = input.getBatchSize(); + int size = getSize(); + reserveOutput(batchSize, size); + + MatrixPtr outV = getOutputValue(); + + /* activation */ { + REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); + forwardActivation(); + } +} + +void FactorizationMachineLayer::backward(const UpdateCallback& callback) { + /* Do derivation */ { + REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); + backwardActivation(); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/FactorizationMachineLayer.h b/paddle/gserver/layers/FactorizationMachineLayer.h new file mode 100644 index 00000000000..e7807c8986c --- /dev/null +++ b/paddle/gserver/layers/FactorizationMachineLayer.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/utils/ThreadLocal.h" + +namespace paddle { +/** + * @brief The Factorization Machine models pairwise (order-2) feature + * interactions as inner product of the learned latent vectors corresponding + * to each input feature. + * + * The Factorization Machine can effectively capture feature interactions + * especially when the input is sparse. While in principle FM can model higher + * order feature interaction, in practice usually only order-2 feature + * interactions are considered. The Factorization Machine Layer here only + * computes the order-2 interations with the formula: + * + * \f[ + * y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j + * \f] + * + * The config file api is factorization_machine. + */ + +class FactorizationMachineLayer : public Layer { +protected: + /// The latent vectors, shape: (size, factorSize_) + std::unique_ptr latentVectors_; + /// The hyperparameter that defines the dimensionality of the factorization + size_t factorSize_; + +public: + explicit FactorizationMachineLayer(const LayerConfig& config) + : Layer(config) {} + ~FactorizationMachineLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 90a33528988..542db5ee5b7 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -2359,6 +2359,25 @@ TEST(Layer, ScaleShiftLayer) { } } +void testFactorizationMachineLayer(InputType type, bool useGpu) { + const int FACTOR_SIZE = 10; + TestConfig config; + config.layerConfig.set_type("factorization_machine"); + config.layerConfig.set_factor_size(FACTOR_SIZE); + config.biasSize = 1; + config.inputDefs.push_back({type, "layer_0", 8192, 0}); + config.layerConfig.add_inputs(); + testLayerGrad(config, "factorization_machine", 16, false, useGpu, false); +} + +TEST(Layer, FactorizationMachineLayer) { + testFactorizationMachineLayer(INPUT_DATA, false); + testFactorizationMachineLayer(INPUT_SPARSE_FLOAT_VALUE_DATA, false); +#ifdef PADDLE_WITH_CUDA + testFactorizationMachineLayer(INPUT_DATA, true); +#endif +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index ebf0911d6ea..0d2140ccf93 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -525,6 +525,9 @@ message LayerConfig { // for switch order layer optional ReshapeConfig reshape_conf = 59; + + // for factorization machine layer + optional uint32 factor_size = 60; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 098a51ab879..07b3ff66dc7 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3780,6 +3780,21 @@ class SwitchOrderLayer(LayerBase): self.config.reshape_conf.width_axis.extend(reshape['width']) +@config_layer('factorization_machine') +class FactorizationMachineLayer(LayerBase): + def __init__(self, name, inputs, factor_size, **xargs): + super(FactorizationMachineLayer, self).__init__( + name, 'factorization_machine', size=1, inputs=inputs, **xargs) + config_assert( + len(self.inputs) == 1, + 'factorization machine layer must have one and only one input.') + self.config.factor_size = factor_size + input_layer = self.get_input_layer(0) + psize = input_layer.size * factor_size + dims = [input_layer.size, 1] + self.create_input_parameter(0, psize, dims) + + # Deprecated, use a new layer specific class instead @config_func def Layer(name, type, **xargs): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index d37f29d2c4b..e6348dca2a8 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -143,6 +143,7 @@ __all__ = [ 'scale_shift_layer', 'img_conv3d_layer', 'resize_layer', + 'factorization_machine', ] @@ -253,6 +254,8 @@ class LayerType(object): RESIZE = 'resize' + FACTORIZATION_MACHINE = 'factorization_machine' + @staticmethod def is_layer_type(type_name): """ @@ -6955,3 +6958,65 @@ def resize_layer(input, size, name=None): """ Layer(name=name, type=LayerType.RESIZE, inputs=Input(input.name), size=size) return LayerOutput(name, LayerType.RESIZE, parents=[input], size=input.size) + + +@wrap_name_default() +@wrap_act_default(act=LinearActivation()) +@wrap_param_attr_default() +@layer_support() +def factorization_machine(input, + factor_size, + act=None, + name=None, + param_attr=None, + layer_attr=None): + """ + The Factorization Machine models pairwise feature interactions as inner + product of the learned latent vectors corresponding to each input feature. + + The Factorization Machine can effectively capture feature interactions + especially when the input is sparse. In practice, usually order 2 feature + interactions are considered using Factorization Machine with the formula: + + .. math:: + + y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j + + Note: + X is the input vector with size n. V is the factor matrix. Each row of V + is the latent vector corresponding to each input dimesion. The size of + each latent vector is k. + + .. code-block:: python + + factor_machine = factorization_machine(input=input_layer, factor_size=10) + + :param input: The input layer. + :type input: LayerOutput + :param factor_size: The hyperparameter that defines the dimensionality of + the latent vector size + :type context_len: int + :param act: Activation Type. Default is linear activation. + :type act: BaseActivation + :param param_attr: The Parameter Attribute. If None, the latent vectors will + be initialized smartly. It's better to set it by + yourself. + :type param_attr: ParameterAttribute + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None + :return: LayerOutput object. + :rtype: LayerOutput + + """ + assert isinstance(input, LayerOutput) + assert factor_size > 0, "the factor_size must be greater than 0." + + Layer( + inputs=[Input(input.name, **param_attr.attr)], + name=name, + factor_size=factor_size, + type=LayerType.FACTORIZATION_MACHINE, + active_type=act.name, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, LayerType.FACTORIZATION_MACHINE, input, activation=act, size=1) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 6a4550c2097..40bbb04bd49 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -10,6 +10,7 @@ test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_la test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer test_seq_slice_layer test_cross_entropy_over_beam test_pooling3D_layer -test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer) +test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer +test_factorization_machine) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr new file mode 100644 index 00000000000..585a5c7b23d --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_factorization_machine.protostr @@ -0,0 +1,39 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 1024 + active_type: "" +} +layers { + name: "__factorization_machine_0__" + type: "factorization_machine" + size: 1 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___factorization_machine_0__.w0" + } + factor_size: 10 +} +parameters { + name: "___factorization_machine_0__.w0" + size: 10240 + initial_mean: 0.0 + initial_std: 0.03125 + dims: 1024 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +input_layer_names: "data" +output_layer_names: "__factorization_machine_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__factorization_machine_0__" + input_layer_names: "data" + output_layer_names: "__factorization_machine_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py new file mode 100644 index 00000000000..62ceb359cf8 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py @@ -0,0 +1,9 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +data = data_layer(name='data', size=1024) + +fm = factorization_machine(input=data, factor_size=10) + +outputs(fm) -- GitLab