提交 7a1a5863 编写于 作者: W wangmeng28

Update variable names and docs for factorization machine layer

上级 e5135e8b
...@@ -32,12 +32,10 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap, ...@@ -32,12 +32,10 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap,
/* initialize the latentVectors_ */ /* initialize the latentVectors_ */
CHECK_EQ(inputLayers_.size(), 1UL); CHECK_EQ(inputLayers_.size(), 1UL);
size_t height = inputLayers_[0]->getSize(); size_t inputSize = inputLayers_[0]->getSize();
CHECK_EQ(parameters_[0]->getSize(), height * factorSize_); CHECK_EQ(parameters_[0]->getSize(), inputSize * factorSize_);
latentVectors_ = latentVectors_ = std::unique_ptr<Weight>(
std::unique_ptr<Weight>(new Weight(height, factorSize_, parameters_[0])); new Weight(inputSize, factorSize_, parameters_[0]));
v2_ = Matrix::create(height, factorSize_, false, useGpu_);
return true; return true;
} }
...@@ -48,79 +46,85 @@ void FactorizationMachineLayer::forward(PassType passType) { ...@@ -48,79 +46,85 @@ void FactorizationMachineLayer::forward(PassType passType) {
const MatrixPtr& inputV = getInputValue(0); const MatrixPtr& inputV = getInputValue(0);
size_t batchSize = inputV->getHeight(); size_t batchSize = inputV->getHeight();
size_t size = getSize(); size_t outputSize = getSize();
reserveOutput(batchSize, size); size_t inputSize = inputLayers_[0]->getSize();
reserveOutput(batchSize, outputSize);
MatrixPtr outV = getOutputValue(); MatrixPtr outV = getOutputValue();
Matrix::resizeOrCreate(tmpMul_, batchSize, factorSize_, false, useGpu_); Matrix::resizeOrCreate(
latentVectorsSquare_, inputSize, factorSize_, false, useGpu_);
Matrix::resizeOrCreate(
inputMulFactor_, batchSize, factorSize_, false, useGpu_);
Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_); Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_);
REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); REGISTER_TIMER_INFO("InputMulFactorTimer", getName().c_str());
tmpMul_->mul(*inputV, *latentVectors_->getW()); inputMulFactor_->mul(*inputV, *latentVectors_->getW());
tmpMul_->square2(*tmpOut_); inputMulFactor_->square2(*tmpOut_);
outV->sumRows(*tmpOut_, 0.5, 0); outV->sumRows(*tmpOut_, 0.5, 0);
x2_ = inputV->clone(0, 0, useGpu_); inputSquare_ = inputV->clone(0, 0, useGpu_);
if (dynamic_cast<CpuSparseMatrix*>(x2_.get())) { if (dynamic_cast<CpuSparseMatrix*>(inputSquare_.get())) {
x2_->copyFrom(*inputV); inputSquare_->copyFrom(*inputV);
(dynamic_cast<CpuSparseMatrix*>(x2_.get()))->square2(); (dynamic_cast<CpuSparseMatrix*>(inputSquare_.get()))->square2();
} else { } else {
inputV->square2(*x2_); inputV->square2(*inputSquare_);
} }
latentVectors_->getW()->square2(*v2_); latentVectors_->getW()->square2(*latentVectorsSquare_);
tmpOut_->mul(*x2_, *v2_); tmpOut_->mul(*inputSquare_, *latentVectorsSquare_);
outV->sumRows(*tmpOut_, -0.5, 1.0); outV->sumRows(*tmpOut_, -0.5, 1.0);
/* activation */ { /* activation */ {
REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); REGISTER_TIMER_INFO("FmAtvTimer", getName().c_str());
forwardActivation(); forwardActivation();
} }
} }
void FactorizationMachineLayer::backward(const UpdateCallback& callback) { void FactorizationMachineLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ { /* Do derivation */ { backwardActivation(); }
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
const MatrixPtr& inputV = getInputValue(0); const MatrixPtr& inputV = getInputValue(0);
const MatrixPtr& oGrad = getOutputGrad(); const MatrixPtr& oGrad = getOutputGrad();
MatrixPtr tmpSum = Matrix::resizeOrCreate(
Matrix::create(1, latentVectors_->getW()->getHeight(), false, useGpu_); tmpSum_, 1, latentVectors_->getW()->getHeight(), false, useGpu_);
MatrixPtr tmpSum_T = Matrix::create(tmpSum->getRowBuf(0), MatrixPtr tmpSumTrans = Matrix::create(tmpSum_->getRowBuf(0),
latentVectors_->getW()->getHeight(), latentVectors_->getW()->getHeight(),
1, 1,
false, false,
useGpu_); useGpu_);
/* Calculate the gradients of the latentVectors_ matrix */ /* Calculate the gradients of the latentVectors_ matrix */
if (latentVectors_->getWGrad()) { if (latentVectors_->getWGrad()) {
MatrixPtr tmpIn = inputV->clone(0, 0, useGpu_); MatrixPtr tmpInput = inputV->clone(0, 0, useGpu_);
if (dynamic_cast<CpuSparseMatrix*>(inputV.get())) { if (dynamic_cast<CpuSparseMatrix*>(inputV.get())) {
CpuSparseMatrix* inputV_s = dynamic_cast<CpuSparseMatrix*>(inputV.get()); CpuSparseMatrix* sparseInputV =
CpuSparseMatrix* x2_s = dynamic_cast<CpuSparseMatrix*>(x2_.get()); dynamic_cast<CpuSparseMatrix*>(inputV.get());
CpuSparseMatrix* tmpIn_s = dynamic_cast<CpuSparseMatrix*>(tmpIn.get()); CpuSparseMatrix* sparseInputSquare =
tmpIn_s->copyFrom(*inputV_s); dynamic_cast<CpuSparseMatrix*>(inputSquare_.get());
tmpIn_s->rowScale(0, *inputV_s, *oGrad); CpuSparseMatrix* sparseTmpInput =
latentVectors_->getWGrad()->mul(*tmpIn_s->getTranspose(), *tmpMul_, 1, 1); dynamic_cast<CpuSparseMatrix*>(tmpInput.get());
tmpIn_s->rowScale(0, *x2_s, *oGrad); sparseTmpInput->copyFrom(*sparseInputV);
sparseTmpInput->rowScale(0, *sparseInputV, *oGrad);
MatrixPtr ones = Matrix::create(1, inputV->getHeight(), false, useGpu_); latentVectors_->getWGrad()->mul(
ones->zeroMem(); *sparseTmpInput->getTranspose(), *inputMulFactor_, 1, 1);
ones->add(-1); sparseTmpInput->rowScale(0, *sparseInputSquare, *oGrad);
tmpSum->mul(*ones, *tmpIn_s, 1, 0);
Matrix::resizeOrCreate(negOnes_, 1, inputV->getHeight(), false, useGpu_);
negOnes_->zeroMem();
negOnes_->add(-1);
tmpSum_->mul(*negOnes_, *sparseTmpInput, 1, 0);
} else { } else {
tmpIn->rowScale(0, *inputV, *oGrad); tmpInput->rowScale(0, *inputV, *oGrad);
latentVectors_->getWGrad()->mul(*tmpIn->getTranspose(), *tmpMul_, 1, 1); latentVectors_->getWGrad()->mul(
tmpIn->rowScale(0, *x2_, *oGrad); *tmpInput->getTranspose(), *inputMulFactor_, 1, 1);
tmpInput->rowScale(0, *inputSquare_, *oGrad);
tmpSum->sumCols(*tmpIn, -1, 0); tmpSum_->sumCols(*tmpInput, -1, 0);
} }
latentVectors_->getWGrad()->addRowScale( latentVectors_->getWGrad()->addRowScale(
0, *latentVectors_->getW(), *tmpSum_T); 0, *latentVectors_->getW(), *tmpSumTrans);
/* Increasing the number of gradient */ /* Increasing the number of gradient */
latentVectors_->getParameterPtr()->incUpdate(callback); latentVectors_->getParameterPtr()->incUpdate(callback);
...@@ -129,10 +133,10 @@ void FactorizationMachineLayer::backward(const UpdateCallback& callback) { ...@@ -129,10 +133,10 @@ void FactorizationMachineLayer::backward(const UpdateCallback& callback) {
/* Calculate the input layers gradient */ /* Calculate the input layers gradient */
MatrixPtr inGrad = getInputGrad(0); MatrixPtr inGrad = getInputGrad(0);
if (inGrad != NULL) { if (inGrad != NULL) {
MatrixPtr latentVectors_T = latentVectors_->getW()->getTranspose(); inGrad->mul(
inGrad->mul(*tmpMul_, *latentVectors_T, 1, 1); *inputMulFactor_, *latentVectors_->getW()->getTranspose(), 1, 1);
tmpSum_T->sumRows(*v2_, -1, 0); tmpSumTrans->sumRows(*latentVectorsSquare_, -1, 0);
inGrad->addColScale(0, *inputV, *tmpSum); inGrad->addColScale(0, *inputV, *tmpSum_);
inGrad->rowScale(0, *inGrad, *oGrad); inGrad->rowScale(0, *inGrad, *oGrad);
} }
} }
......
...@@ -34,27 +34,36 @@ namespace paddle { ...@@ -34,27 +34,36 @@ namespace paddle {
* y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j * y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j
* \f] * \f]
* *
* The detailed calculation for forward and backward can be found at this paper:
*
* Rendle, Steffen. Factorization machines. IEEE 10th International
* Conference on Data Mining (ICDM). IEEE, 2010.
*
* The config file api is factorization_machine. * The config file api is factorization_machine.
*/ */
class FactorizationMachineLayer : public Layer { class FactorizationMachineLayer : public Layer {
protected: protected:
/// The latent vectors, shape: (size, factorSize_) // The latent vectors, shape: (size, factorSize_)
/// Each row of the latentVectors_ matrix is the latent vector // Each row of the latentVectors_ matrix is the latent vector
/// corresponding to one input feature dimension // corresponding to one input feature dimension
std::unique_ptr<Weight> latentVectors_; std::unique_ptr<Weight> latentVectors_;
/// The hyperparameter that defines the dimensionality of the factorization // The hyperparameter that defines the dimensionality of the factorization
size_t factorSize_; size_t factorSize_;
private: private:
/// The result of input matrix * letent vector matrix that will be used in // Store the square values of the letent vectors matrix
/// both forward and backward step MatrixPtr latentVectorsSquare_;
MatrixPtr tmpMul_; // Store the square values of input matrix
MatrixPtr inputSquare_;
// The result of input matrix * latent vector matrix that will be used in
// both forward and backward step
MatrixPtr inputMulFactor_;
// Temporary calculation result store
MatrixPtr tmpOut_; MatrixPtr tmpOut_;
/// Store the square values of the letent vectors matrix MatrixPrt tmpSum_;
MatrixPtr v2_; // Negative identity matrix
/// Store the square values of input matrix MatrixPtr negOnes_;
MatrixPtr x2_;
public: public:
explicit FactorizationMachineLayer(const LayerConfig& config) explicit FactorizationMachineLayer(const LayerConfig& config)
......
...@@ -2442,6 +2442,7 @@ void testFactorizationMachineLayer(InputType type, bool useGpu) { ...@@ -2442,6 +2442,7 @@ void testFactorizationMachineLayer(InputType type, bool useGpu) {
TEST(Layer, FactorizationMachineLayer) { TEST(Layer, FactorizationMachineLayer) {
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testFactorizationMachineLayer(INPUT_DATA, useGpu); testFactorizationMachineLayer(INPUT_DATA, useGpu);
testFactorizationMachineLayer(INPUT_SPARSE_FLOAT_VALUE_DATA, useGpu);
} }
} }
......
...@@ -262,15 +262,15 @@ void CpuSparseMatrix::printOneRow(std::ostream& os, size_t idx) const { ...@@ -262,15 +262,15 @@ void CpuSparseMatrix::printOneRow(std::ostream& os, size_t idx) const {
void CpuSparseMatrix::rowScale(size_t cCol, CpuSparseMatrix& b, Matrix& c) { void CpuSparseMatrix::rowScale(size_t cCol, CpuSparseMatrix& b, Matrix& c) {
CHECK(getFormat() != SPARSE_CSC) << "Not supported"; CHECK(getFormat() != SPARSE_CSC) << "Not supported";
CHECK(height_ == b.getHeight()); CHECK_EQ(height_, b.getHeight());
CHECK(width_ == b.getWidth()); CHECK_EQ(width_, b.getWidth());
real* A = getValue(); real* A = getValue();
real* B = b.getValue(); real* B = b.getValue();
for (size_t i = 0; i < height_; i++) { for (size_t i = 0; i < height_; i++) {
size_t start = getRowStartIdx(i); size_t start = getRowStartIdx(i);
size_t end = getRowStartIdx(i + 1); size_t end = getRowStartIdx(i + 1);
CHECK(start == b.getRowStartIdx(i)); CHECK_EQ(start, b.getRowStartIdx(i));
CHECK(end == b.getRowStartIdx(i + 1)); CHECK_EQ(end, b.getRowStartIdx(i + 1));
for (size_t j = start; j < end; j++) { for (size_t j = start; j < end; j++) {
A[j] = B[j] * c.getElement(i, cCol); A[j] = B[j] * c.getElement(i, cCol);
} }
......
...@@ -7161,16 +7161,26 @@ def factorization_machine(input, ...@@ -7161,16 +7161,26 @@ def factorization_machine(input,
The Factorization Machine models pairwise feature interactions as inner The Factorization Machine models pairwise feature interactions as inner
product of the learned latent vectors corresponding to each input feature. product of the learned latent vectors corresponding to each input feature.
The Factorization Machine can effectively capture feature interactions The Factorization Machine can effectively capture feature interactions
especially when the input is sparse. In practice, usually order 2 feature especially when the input is sparse.
interactions are considered using Factorization Machine with the formula:
This implementation only consider the 2-order feature interactions using
Factorization Machine with the formula:
.. math:: .. math::
y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j
Note: Note:
X is the input vector with size n. V is the factor matrix. Each row of V X is the input vector with size n. V is the factor matrix. Each row of V
is the latent vector corresponding to each input dimesion. The size of is the latent vector corresponding to each input dimesion. The size of
each latent vector is k. each latent vector is k.
For details of Factorization Machine, please refer to the paper:
Rendle, Steffen. Factorization machines. IEEE 10th International
Conference on Data Mining (ICDM). IEEE, 2010.
.. code-block:: python .. code-block:: python
factor_machine = factorization_machine(input=input_layer, factor_size=10) factor_machine = factorization_machine(input=input_layer, factor_size=10)
:param input: The input layer. :param input: The input layer.
:type input: LayerOutput :type input: LayerOutput
:param factor_size: The hyperparameter that defines the dimensionality of :param factor_size: The hyperparameter that defines the dimensionality of
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册