提交 947b6a77 编写于 作者: W wangmeng28

Implement factorization machine layer

上级 f504c8a8
...@@ -33,7 +33,10 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap, ...@@ -33,7 +33,10 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap,
/* initialize the latentVectors_ */ /* initialize the latentVectors_ */
CHECK_EQ(inputLayers_.size(), 1UL); CHECK_EQ(inputLayers_.size(), 1UL);
size_t height = inputLayers_[0]->getSize(); size_t height = inputLayers_[0]->getSize();
latentVectors_.reset(new Weight(height, factorSize_, parameters_[0])); latentVectors_ =
std::unique_ptr<Weight>(new Weight(height, factorSize_, parameters_[0]));
v2_ = latentVectors_->getW()->clone(0, 0, useGpu_);
return true; return true;
} }
...@@ -41,14 +44,28 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap, ...@@ -41,14 +44,28 @@ bool FactorizationMachineLayer::init(const LayerMap& layerMap,
void FactorizationMachineLayer::forward(PassType passType) { void FactorizationMachineLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
auto input = getInput(0); const MatrixPtr& inputV = getInputValue(0);
int batchSize = input.getBatchSize(); size_t batchSize = inputV->getHeight();
int size = getSize(); size_t size = getSize();
reserveOutput(batchSize, size); reserveOutput(batchSize, size);
MatrixPtr outV = getOutputValue(); MatrixPtr outV = getOutputValue();
Matrix::resizeOrCreate(tmpMul_, batchSize, factorSize_, false, useGpu_);
Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_);
REGISTER_TIMER_INFO("FwMulTimer", getName().c_str());
tmpMul_->mul(*inputV, *latentVectors_->getW());
tmpOut_->pow2(*tmpMul_, 2);
outV->sumRows(*tmpOut_, 0.5, 0);
x2_ = inputV->clone(0, 0, useGpu_);
x2_->pow2(*inputV, 2);
v2_->pow2(*latentVectors_->getW(), 2);
tmpOut_->mul(*x2_, *v2_);
outV->sumRows(*tmpOut_, -0.5, 1.0);
/* activation */ { /* activation */ {
REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
forwardActivation(); forwardActivation();
...@@ -60,6 +77,43 @@ void FactorizationMachineLayer::backward(const UpdateCallback& callback) { ...@@ -60,6 +77,43 @@ void FactorizationMachineLayer::backward(const UpdateCallback& callback) {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation(); backwardActivation();
} }
const MatrixPtr& inputV = getInputValue(0);
const MatrixPtr& oGrad = getOutputGrad();
MatrixPtr tmpSum =
Matrix::create(1, latentVectors_->getW()->getHeight(), false, useGpu_);
MatrixPtr tmpSum_T = Matrix::create(tmpSum->getRowBuf(0),
latentVectors_->getW()->getHeight(),
1,
false,
useGpu_);
/* Calculate the gradients of the latentVectors_ matrix */
if (latentVectors_->getWGrad()) {
MatrixPtr tmpIn = inputV->clone(0, 0, useGpu_);
tmpIn->rowScale(0, *inputV, *oGrad);
latentVectors_->getWGrad()->mul(*tmpIn->getTranspose(), *tmpMul_, 1, 1);
tmpIn->rowScale(0, *x2_, *oGrad);
tmpSum->sumCols(*tmpIn, -1, 0);
latentVectors_->getWGrad()->addRowScale(
0, *latentVectors_->getW(), *tmpSum_T);
/* Increasing the number of gradient */
latentVectors_->getParameterPtr()->incUpdate(callback);
}
/* Calculate the input layers gradient */
MatrixPtr inGrad = getInputGrad(0);
if (inGrad != NULL) {
MatrixPtr latentVectors_T = latentVectors_->getW()->getTranspose();
inGrad->mul(*tmpMul_, *latentVectors_T, 1, 1);
tmpSum_T->sumRows(*v2_, -1, 0);
inGrad->addColScale(0, *inputV, *tmpSum);
inGrad->rowScale(0, *inGrad, *oGrad);
}
} }
} // namespace paddle } // namespace paddle
...@@ -40,10 +40,22 @@ namespace paddle { ...@@ -40,10 +40,22 @@ namespace paddle {
class FactorizationMachineLayer : public Layer { class FactorizationMachineLayer : public Layer {
protected: protected:
/// The latent vectors, shape: (size, factorSize_) /// The latent vectors, shape: (size, factorSize_)
/// Each row of the latentVectors_ matrix is the latent vector
/// corresponding to one input feature dimension
std::unique_ptr<Weight> latentVectors_; std::unique_ptr<Weight> latentVectors_;
/// The hyperparameter that defines the dimensionality of the factorization /// The hyperparameter that defines the dimensionality of the factorization
size_t factorSize_; size_t factorSize_;
private:
/// The result of input matrix * letent vector matrix that will be used in
/// both forward and backward step
MatrixPtr tmpMul_;
MatrixPtr tmpOut_;
/// Store the square values of the letent vectors matrix
MatrixPtr v2_;
/// Store the square values of input matrix
MatrixPtr x2_;
public: public:
explicit FactorizationMachineLayer(const LayerConfig& config) explicit FactorizationMachineLayer(const LayerConfig& config)
: Layer(config) {} : Layer(config) {}
......
...@@ -2363,8 +2363,9 @@ void testFactorizationMachineLayer(InputType type, bool useGpu) { ...@@ -2363,8 +2363,9 @@ void testFactorizationMachineLayer(InputType type, bool useGpu) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("factorization_machine"); config.layerConfig.set_type("factorization_machine");
config.layerConfig.set_factor_size(FACTOR_SIZE); config.layerConfig.set_factor_size(FACTOR_SIZE);
config.biasSize = 1; config.layerConfig.set_size(1);
config.inputDefs.push_back({type, "layer_0", 8192, 0}); config.biasSize = 0;
config.inputDefs.push_back({type, "layer_0", 1024, 10240});
config.layerConfig.add_inputs(); config.layerConfig.add_inputs();
testLayerGrad(config, "factorization_machine", 16, false, useGpu, false); testLayerGrad(config, "factorization_machine", 16, false, useGpu, false);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册