diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index 254120443dc3d41bf2422be2e88cb376d70c93d4..ac66fd4712af487cd83f1e0356d7a6a090f607f4 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -26,15 +26,23 @@ bool CosSimLayer::init(const LayerMap& layerMap, Layer::init(layerMap, parameterMap); CHECK_EQ(inputLayers_.size(), 2LU); + + createFunction(forward_, + "CosSimForward", + FuncConfig().set("scale", (real)config_.cos_scale())); + createFunction(backward_, + "CosSimBackward", + FuncConfig().set("scale", (real)config_.cos_scale())); + return true; } void CosSimLayer::forward(PassType passType) { Layer::forward(passType); - /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); + CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; { REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); @@ -42,26 +50,42 @@ void CosSimLayer::forward(PassType passType) { } MatrixPtr outV = getOutputValue(); - /* activation */ { REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut2 = getInputValue(1); - outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale()); + + CHECK(outV && prevOut1 && prevOut2); + forward_[0]->calc( + {Tensor(prevOut1->getData(), + Dims{prevOut1->getHeight(), prevOut1->getWidth()}), + Tensor(prevOut2->getData(), + Dims{prevOut2->getHeight(), prevOut2->getWidth()})}, + {Tensor(outV->getData(), Dims{outV->getHeight(), outV->getWidth()})}, + {}); } } void CosSimLayer::backward(const UpdateCallback& callback) { /* activation */ { REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); - MatrixPtr outG = this->getOutputGrad(); - - outG->cosSimDerivative(*this->getOutputValue(), - *getInputValue(0), - *getInputValue(1), - *getInputGrad(0), - *getInputGrad(1), - config_.cos_scale()); + CHECK_EQ(backward_.size(), 1) << "Only one backward function needed"; + + const auto outG = this->getOutputGrad(); + const auto outV = this->getOutputValue(); + const auto inV1 = this->getInputValue(0); + const auto inV2 = this->getInputValue(1); + auto inG1 = this->getInputGrad(0); + auto inG2 = this->getInputGrad(1); + CHECK(outG && outV && inV1 && inV2 && inG1 && inG2); + backward_[0]->calc( + {Tensor(outV->getData(), Dims{outV->getHeight(), outV->getWidth()}), + Tensor(inV1->getData(), Dims{inV1->getHeight(), inV1->getWidth()}), + Tensor(inV2->getData(), Dims{inV2->getHeight(), inV2->getWidth()}), + Tensor(inG1->getData(), Dims{inG1->getHeight(), inG1->getWidth()}), + Tensor(inG2->getData(), Dims{inG2->getHeight(), inG2->getWidth()})}, + {Tensor(outG->getData(), Dims{outG->getHeight(), outG->getWidth()})}, + {}); } } diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/gserver/layers/CosSimLayer.h index 65549626098f084c5e1786885e06c1bdfa3ba74c..8afaee62c2dcacba006846df0111fcbe8f7575e4 100644 --- a/paddle/gserver/layers/CosSimLayer.h +++ b/paddle/gserver/layers/CosSimLayer.h @@ -28,7 +28,7 @@ namespace paddle { * * - Input1: A vector (batchSize * dataDim) * * - Input2: A vector (batchSize * dataDim) or (1 * dataDim) * - * - Output: A vector (dataDim * 1) + * - Output: A vector (batchSize * 1) * * The config file api is cos_sim. */ diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp index 5f652319e5620227fca166a8f72e5aed416bf5dd..427191e540778a9b68c5b2e2309a4a9b6b645734 100644 --- a/paddle/gserver/layers/CosSimVecMatLayer.cpp +++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp @@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "Layer.h" -#include "paddle/math/Matrix.h" +#include "CosSimVecMatLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" @@ -98,11 +97,23 @@ bool CosSimVecMatLayer::init(const LayerMap& layerMap, dataDim, /* trans= */ false, useGpu_); + + /// todo(tianbing), do we really need to check these shared pointers? + CHECK(tmpRow0 && tmpRow1 && tmpRow2 && tmpRow3 && tmpMtx0 && tmpMtx1); + + createFunction(forward_, + "CosSimForward", + FuncConfig().set("scale", (real)config_.cos_scale())); + createFunction(backward_, + "CosSimBackward", + FuncConfig().set("scale", (real)config_.cos_scale())); + return true; } void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); + CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); @@ -118,17 +129,26 @@ void CosSimVecMatLayer::forward(PassType passType) { } MatrixPtr outV = getOutputValue(); - + CHECK(outV && inV0 && inV1); REGISTER_TIMER_INFO("FwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); - tmpRow2->cosSim(*(tmpMtx0), *(tmpRow0), config_.cos_scale()); + + forward_[0]->calc({Tensor(tmpMtx0->getData(), + Dims{tmpMtx0->getHeight(), tmpMtx0->getWidth()}), + Tensor(tmpRow0->getData(), + Dims{tmpRow0->getHeight(), tmpRow0->getWidth()})}, + {Tensor(tmpRow2->getData(), + Dims{tmpRow2->getHeight(), tmpRow2->getWidth()})}, + {}); } } void CosSimVecMatLayer::backward(const UpdateCallback& callback) { + CHECK_EQ(backward_.size(), 1) << "Only one forward function needed"; + MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inG0 = getInputGrad(0); @@ -137,27 +157,31 @@ void CosSimVecMatLayer::backward(const UpdateCallback& callback) { MatrixPtr outG = getOutputGrad(); size_t batchSize = inV0->getHeight(); - + CHECK(inV0 && inV1 && inG0 && inG1 && outV && outG); REGISTER_TIMER_INFO("BwCosVMTimer", getName().c_str()); - if (inG0 && inG1) { - for (size_t i = 0; i < batchSize; i++) { - tmpRow0->setData(inV0->rowBuf(i)); - tmpRow1->setData(inG0->rowBuf(i)); - tmpMtx0->setData(inV1->rowBuf(i)); - tmpMtx1->setData(inG1->rowBuf(i)); - tmpRow2->setData(outV->rowBuf(i)); - tmpRow3->setData(outG->rowBuf(i)); - - tmpRow3->cosSimDerivative(*(tmpRow2), - *(tmpMtx0), - *(tmpRow0), - *(tmpMtx1), - *(tmpRow1), - config_.cos_scale()); - } - } else { - CHECK(!inG0 || !inG1) << "Not supported"; + for (size_t i = 0; i < batchSize; i++) { + tmpRow0->setData(inV0->rowBuf(i)); + tmpRow1->setData(inG0->rowBuf(i)); + tmpMtx0->setData(inV1->rowBuf(i)); + tmpMtx1->setData(inG1->rowBuf(i)); + tmpRow2->setData(outV->rowBuf(i)); + tmpRow3->setData(outG->rowBuf(i)); + + backward_[0]->calc( + {Tensor(tmpRow2->getData(), + Dims{tmpRow2->getHeight(), tmpRow2->getWidth()}), + Tensor(tmpMtx0->getData(), + Dims{tmpMtx0->getHeight(), tmpMtx0->getWidth()}), + Tensor(tmpRow0->getData(), + Dims{tmpRow0->getHeight(), tmpRow0->getWidth()}), + Tensor(tmpMtx1->getData(), + Dims{tmpMtx1->getHeight(), tmpMtx1->getWidth()}), + Tensor(tmpRow1->getData(), + Dims{tmpRow1->getHeight(), tmpRow1->getWidth()})}, + {Tensor(tmpRow3->getData(), + Dims{tmpRow3->getHeight(), tmpRow3->getWidth()})}, + {}); } } diff --git a/paddle/gserver/layers/CosSimVecMatLayer.h b/paddle/gserver/layers/CosSimVecMatLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..bee83b58154b67587656c1dd533b7ee1585ff6a0 --- /dev/null +++ b/paddle/gserver/layers/CosSimVecMatLayer.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" + +namespace paddle { +/** + * @brief A layer for computing cosine similarity between a vector + * and each row of a matrix + * out[i] = cos_scale * cos(in1, in2(i,:)); + * @note used in NEURAL TURING MACHINE + * + * Input1: a vector (batchSize * dataDim) + * + * Input2: a matrix in vector form (batchSize * (weightDim*dataDim)) + * + * Output: a vector (batchSize * weightDim) + */ + +class CosSimVecMatLayer : public Layer { +public: + explicit CosSimVecMatLayer(const LayerConfig& config) : Layer(config) {} + + ~CosSimVecMatLayer() {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + void backward(const UpdateCallback& callback = nullptr); + +protected: + MatrixPtr tmpMtx0; + MatrixPtr tmpMtx1; + MatrixPtr tmpRow0; + MatrixPtr tmpRow1; + MatrixPtr tmpRow2; + MatrixPtr tmpRow3; +}; + +} // namespace paddle