From 947b6a77ce08c1ca2dc386514f0e97eb75ade91a Mon Sep 17 00:00:00 2001
From: wangmeng28 <wangmeng28@baidu.com>
Date: Tue, 17 Oct 2017 00:26:53 +0800
Subject: [PATCH] Implement factorization machine layer

---
 .../layers/FactorizationMachineLayer.cpp      | 62 +++++++++++++++++--
 .../layers/FactorizationMachineLayer.h        | 12 ++++
 paddle/gserver/tests/test_LayerGrad.cpp       |  5 +-
 3 files changed, 73 insertions(+), 6 deletions(-)
diff --git a/paddle/gserver/layers/FactorizationMachineLayer.cpp b/paddle/gserver/layers/FactorizationMachineLayer.cpp
index 5456bf2601e..09128eeeef1 100644
--- a/paddle/gserver/layers/FactorizationMachineLayer.cpp
+++ b/paddle/gserver/layers/FactorizationMachineLayer.cpp
@@ -33,7 +33,10 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap,
   /* initialize the latentVectors_ */
   CHECK_EQ(inputLayers_.size(), 1UL);
   size_t height = inputLayers_[0]->getSize();
-  latentVectors_.reset(new Weight(height, factorSize_, parameters_[0]));
+  latentVectors_ =
+      std::unique_ptr<Weight>(new Weight(height, factorSize_, parameters_[0]));
+
+  v2_ = latentVectors_->getW()->clone(0, 0, useGpu_);
 
   return true;
 }
@@ -41,14 +44,28 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap,
 void FactorizationMachineLayer::forward(PassType passType) {
   Layer::forward(passType);
 
-  auto input = getInput(0);
+  const MatrixPtr& inputV = getInputValue(0);
 
-  int batchSize = input.getBatchSize();
-  int size = getSize();
+  size_t batchSize = inputV->getHeight();
+  size_t size = getSize();
   reserveOutput(batchSize, size);
 
   MatrixPtr outV = getOutputValue();
 
+  Matrix::resizeOrCreate(tmpMul_, batchSize, factorSize_, false, useGpu_);
+  Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_);
+
+  REGISTER_TIMER_INFO("FwMulTimer", getName().c_str());
+  tmpMul_->mul(*inputV, *latentVectors_->getW());
+  tmpOut_->pow2(*tmpMul_, 2);
+  outV->sumRows(*tmpOut_, 0.5, 0);
+
+  x2_ = inputV->clone(0, 0, useGpu_);
+  x2_->pow2(*inputV, 2);
+  v2_->pow2(*latentVectors_->getW(), 2);
+  tmpOut_->mul(*x2_, *v2_);
+  outV->sumRows(*tmpOut_, -0.5, 1.0);
+
   /* activation */ {
     REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
     forwardActivation();
@@ -60,6 +77,43 @@ void FactorizationMachineLayer::backward(const UpdateCallback& callback) {
     REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
     backwardActivation();
   }
+
+  const MatrixPtr& inputV = getInputValue(0);
+  const MatrixPtr& oGrad = getOutputGrad();
+
+  MatrixPtr tmpSum =
+      Matrix::create(1, latentVectors_->getW()->getHeight(), false, useGpu_);
+  MatrixPtr tmpSum_T = Matrix::create(tmpSum->getRowBuf(0),
+                                      latentVectors_->getW()->getHeight(),
+                                      1,
+                                      false,
+                                      useGpu_);
+
+  /* Calculate the gradients of the latentVectors_ matrix */
+  if (latentVectors_->getWGrad()) {
+    MatrixPtr tmpIn = inputV->clone(0, 0, useGpu_);
+    tmpIn->rowScale(0, *inputV, *oGrad);
+
+    latentVectors_->getWGrad()->mul(*tmpIn->getTranspose(), *tmpMul_, 1, 1);
+
+    tmpIn->rowScale(0, *x2_, *oGrad);
+    tmpSum->sumCols(*tmpIn, -1, 0);
+    latentVectors_->getWGrad()->addRowScale(
+        0, *latentVectors_->getW(), *tmpSum_T);
+
+    /* Increasing the number of gradient */
+    latentVectors_->getParameterPtr()->incUpdate(callback);
+  }
+
+  /* Calculate the input layers gradient */
+  MatrixPtr inGrad = getInputGrad(0);
+  if (inGrad != NULL) {
+    MatrixPtr latentVectors_T = latentVectors_->getW()->getTranspose();
+    inGrad->mul(*tmpMul_, *latentVectors_T, 1, 1);
+    tmpSum_T->sumRows(*v2_, -1, 0);
+    inGrad->addColScale(0, *inputV, *tmpSum);
+    inGrad->rowScale(0, *inGrad, *oGrad);
+  }
 }
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/FactorizationMachineLayer.h b/paddle/gserver/layers/FactorizationMachineLayer.h
index e7807c8986c..7cf064690ff 100644
--- a/paddle/gserver/layers/FactorizationMachineLayer.h
+++ b/paddle/gserver/layers/FactorizationMachineLayer.h
@@ -40,10 +40,22 @@ namespace paddle {
 class FactorizationMachineLayer : public Layer {
 protected:
   /// The latent vectors, shape: (size, factorSize_)
+  /// Each row of the latentVectors_ matrix is the latent vector
+  /// corresponding to one input feature dimension
   std::unique_ptr<Weight> latentVectors_;
   /// The hyperparameter that defines the dimensionality of the factorization
   size_t factorSize_;
 
+private:
+  /// The result of input matrix * letent vector matrix that will be used in
+  /// both forward and backward step
+  MatrixPtr tmpMul_;
+  MatrixPtr tmpOut_;
+  /// Store the square values of the letent vectors matrix
+  MatrixPtr v2_;
+  /// Store the square values of input matrix
+  MatrixPtr x2_;
+
 public:
   explicit FactorizationMachineLayer(const LayerConfig& config)
       : Layer(config) {}
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index eea884cb502..21e8fb7eed1 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -2363,8 +2363,9 @@ void testFactorizationMachineLayer(InputType type, bool useGpu) {
   TestConfig config;
   config.layerConfig.set_type("factorization_machine");
   config.layerConfig.set_factor_size(FACTOR_SIZE);
-  config.biasSize = 1;
-  config.inputDefs.push_back({type, "layer_0", 8192, 0});
+  config.layerConfig.set_size(1);
+  config.biasSize = 0;
+  config.inputDefs.push_back({type, "layer_0", 1024, 10240});
   config.layerConfig.add_inputs();
   testLayerGrad(config, "factorization_machine", 16, false, useGpu, false);
 }
-- 
GitLab