From 947b6a77ce08c1ca2dc386514f0e97eb75ade91a Mon Sep 17 00:00:00 2001 From: wangmeng28 Date: Tue, 17 Oct 2017 00:26:53 +0800 Subject: [PATCH] Implement factorization machine layer --- .../layers/FactorizationMachineLayer.cpp | 62 +++++++++++++++++-- .../layers/FactorizationMachineLayer.h | 12 ++++ paddle/gserver/tests/test_LayerGrad.cpp | 5 +- 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/paddle/gserver/layers/FactorizationMachineLayer.cpp b/paddle/gserver/layers/FactorizationMachineLayer.cpp index 5456bf2601e..09128eeeef1 100644 --- a/paddle/gserver/layers/FactorizationMachineLayer.cpp +++ b/paddle/gserver/layers/FactorizationMachineLayer.cpp @@ -33,7 +33,10 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap, /* initialize the latentVectors_ */ CHECK_EQ(inputLayers_.size(), 1UL); size_t height = inputLayers_[0]->getSize(); - latentVectors_.reset(new Weight(height, factorSize_, parameters_[0])); + latentVectors_ = + std::unique_ptr(new Weight(height, factorSize_, parameters_[0])); + + v2_ = latentVectors_->getW()->clone(0, 0, useGpu_); return true; } @@ -41,14 +44,28 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap, void FactorizationMachineLayer::forward(PassType passType) { Layer::forward(passType); - auto input = getInput(0); + const MatrixPtr& inputV = getInputValue(0); - int batchSize = input.getBatchSize(); - int size = getSize(); + size_t batchSize = inputV->getHeight(); + size_t size = getSize(); reserveOutput(batchSize, size); MatrixPtr outV = getOutputValue(); + Matrix::resizeOrCreate(tmpMul_, batchSize, factorSize_, false, useGpu_); + Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_); + + REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); + tmpMul_->mul(*inputV, *latentVectors_->getW()); + tmpOut_->pow2(*tmpMul_, 2); + outV->sumRows(*tmpOut_, 0.5, 0); + + x2_ = inputV->clone(0, 0, useGpu_); + x2_->pow2(*inputV, 2); + v2_->pow2(*latentVectors_->getW(), 2); + tmpOut_->mul(*x2_, *v2_); + outV->sumRows(*tmpOut_, -0.5, 1.0); + /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); @@ -60,6 +77,43 @@ void FactorizationMachineLayer::backward(const UpdateCallback& callback) { REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); backwardActivation(); } + + const MatrixPtr& inputV = getInputValue(0); + const MatrixPtr& oGrad = getOutputGrad(); + + MatrixPtr tmpSum = + Matrix::create(1, latentVectors_->getW()->getHeight(), false, useGpu_); + MatrixPtr tmpSum_T = Matrix::create(tmpSum->getRowBuf(0), + latentVectors_->getW()->getHeight(), + 1, + false, + useGpu_); + + /* Calculate the gradients of the latentVectors_ matrix */ + if (latentVectors_->getWGrad()) { + MatrixPtr tmpIn = inputV->clone(0, 0, useGpu_); + tmpIn->rowScale(0, *inputV, *oGrad); + + latentVectors_->getWGrad()->mul(*tmpIn->getTranspose(), *tmpMul_, 1, 1); + + tmpIn->rowScale(0, *x2_, *oGrad); + tmpSum->sumCols(*tmpIn, -1, 0); + latentVectors_->getWGrad()->addRowScale( + 0, *latentVectors_->getW(), *tmpSum_T); + + /* Increasing the number of gradient */ + latentVectors_->getParameterPtr()->incUpdate(callback); + } + + /* Calculate the input layers gradient */ + MatrixPtr inGrad = getInputGrad(0); + if (inGrad != NULL) { + MatrixPtr latentVectors_T = latentVectors_->getW()->getTranspose(); + inGrad->mul(*tmpMul_, *latentVectors_T, 1, 1); + tmpSum_T->sumRows(*v2_, -1, 0); + inGrad->addColScale(0, *inputV, *tmpSum); + inGrad->rowScale(0, *inGrad, *oGrad); + } } } // namespace paddle diff --git a/paddle/gserver/layers/FactorizationMachineLayer.h b/paddle/gserver/layers/FactorizationMachineLayer.h index e7807c8986c..7cf064690ff 100644 --- a/paddle/gserver/layers/FactorizationMachineLayer.h +++ b/paddle/gserver/layers/FactorizationMachineLayer.h @@ -40,10 +40,22 @@ namespace paddle { class FactorizationMachineLayer : public Layer { protected: /// The latent vectors, shape: (size, factorSize_) + /// Each row of the latentVectors_ matrix is the latent vector + /// corresponding to one input feature dimension std::unique_ptr latentVectors_; /// The hyperparameter that defines the dimensionality of the factorization size_t factorSize_; +private: + /// The result of input matrix * letent vector matrix that will be used in + /// both forward and backward step + MatrixPtr tmpMul_; + MatrixPtr tmpOut_; + /// Store the square values of the letent vectors matrix + MatrixPtr v2_; + /// Store the square values of input matrix + MatrixPtr x2_; + public: explicit FactorizationMachineLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index eea884cb502..21e8fb7eed1 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -2363,8 +2363,9 @@ void testFactorizationMachineLayer(InputType type, bool useGpu) { TestConfig config; config.layerConfig.set_type("factorization_machine"); config.layerConfig.set_factor_size(FACTOR_SIZE); - config.biasSize = 1; - config.inputDefs.push_back({type, "layer_0", 8192, 0}); + config.layerConfig.set_size(1); + config.biasSize = 0; + config.inputDefs.push_back({type, "layer_0", 1024, 10240}); config.layerConfig.add_inputs(); testLayerGrad(config, "factorization_machine", 16, false, useGpu, false); } -- GitLab