提交 ece32910 编写于 作者: Y Yibing Liu

refine rank_loss_op

上级 f2cfa324
...@@ -28,18 +28,21 @@ class RankLossOp : public framework::OperatorWithKernel { ...@@ -28,18 +28,21 @@ class RankLossOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
// input check // input check
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("P"), "Input(P) shouldn't be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Oi"), "Input(Oi) shouldn't be null"); "Input(Label) shouldn't be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Oj"), "Input(Oj) shouldn't be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Left"),
auto p_dims = ctx.Input<framework::Tensor>("P")->dims(); "Input(Left) shouldn't be null");
auto oi_dims = ctx.Input<framework::Tensor>("Oi")->dims(); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Right"),
auto oj_dims = ctx.Input<framework::Tensor>("Oj")->dims(); "Input(Right) shouldn't be null");
PADDLE_ENFORCE_EQ(oi_dims, oj_dims, auto label_dims = ctx.Input<framework::Tensor>("Label")->dims();
"Input(Oi) and Input(Oj) must have the same size"); auto left_dims = ctx.Input<framework::Tensor>("Left")->dims();
PADDLE_ENFORCE_EQ( auto right_dims = ctx.Input<framework::Tensor>("Right")->dims();
p_dims, oi_dims, PADDLE_ENFORCE((label_dims.size() == 1) && (left_dims.size() == 1) &&
"Input(P) must have the same size with Input(Oi) & Input(Oj)"); (right_dims.size() == 1),
ctx.Output<framework::Tensor>("Out")->Resize(p_dims); "The rank of all inputs must be 1.");
PADDLE_ENFORCE((label_dims == left_dims) && (left_dims == right_dims),
"All inputs must have the same size");
ctx.Output<framework::LoDTensor>("Out")->Resize(label_dims);
} }
}; };
...@@ -48,14 +51,23 @@ class RankLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -48,14 +51,23 @@ class RankLossOpMaker : public framework::OpProtoAndCheckerMaker {
RankLossOpMaker(framework::OpProto *proto, RankLossOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("P", "The desired target values for posteriors."); AddInput("Label",
AddInput("Oi", "The model output for item i."); "The label indicating A ranked higher than B or not, 1-D tensor.");
AddInput("Oj", "The model output for item j."); AddInput("Left", "The output of RankNet for doc A, 1-D tensor.");
AddOutput("Out", "The output tensor of RankLoss operator."); AddInput("Right", "The output of RankNet for doc B, 1-D tensor");
AddOutput("Out", "The output loss of RankLoss operator, 1-D tensor.");
AddComment(R"DOC(RankLoss operator AddComment(R"DOC(RankLoss operator
A rank loss operator for learning to rank (LTR) task. This operator contains Rank loss operator for RankNet[1]. RankNet is a pairwise ranking model with
three inputs: P, Oi, and Oj, and the rank cost can be expressed as one training sample consisting of a pair of doc A and B, and the label P
indicating that A is ranked higher than B or not:
P = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of
the input pair.
The RankLoss operator contains three inputs: Left (o_i), Right (o_j) and Label
(P_{i,j}), which represent the output of RankNet for two docs and the label
respectively, and yields the rank loss C_{i,j} by following the expression
\f[ \f[
C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\ C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\
...@@ -63,10 +75,11 @@ three inputs: P, Oi, and Oj, and the rank cost can be expressed as ...@@ -63,10 +75,11 @@ three inputs: P, Oi, and Oj, and the rank cost can be expressed as
\tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \} \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
\f] \f]
A detailed explanation about these notations can be found in The operator can take inputs of one sample or in batch.
[1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to [1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to
Rank useing Gradient Descent. Rank using Gradient Descent.
http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf
)DOC"); )DOC");
} }
}; };
...@@ -81,15 +94,25 @@ class RankLossGradOp : public framework::OperatorWithKernel { ...@@ -81,15 +94,25 @@ class RankLossGradOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("P"), "Input(P) shouldn't be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Oi"), "Input(Oi) shouldn't be null."); "Input(Label) shouldn't be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Oj"), "Input(Oj) shouldn't be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Left"),
"Input(Left) shouldn't be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Right"),
"Input(Right) shouldn't be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) shouldn't be null."); "Input(Out@GRAD) shouldn't be null.");
auto dims = ctx.Input<framework::Tensor>("P")->dims(); auto dims = ctx.Input<framework::Tensor>("Left")->dims();
ctx.Output<framework::Tensor>(framework::GradVarName("P"))->Resize(dims); auto *left_grad =
ctx.Output<framework::Tensor>(framework::GradVarName("Oi"))->Resize(dims); ctx.Output<framework::LoDTensor>(framework::GradVarName("Left"));
ctx.Output<framework::Tensor>(framework::GradVarName("Oj"))->Resize(dims); auto *right_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Right"));
if (left_grad) {
left_grad->Resize(dims);
}
if (right_grad) {
right_grad->Resize(dims);
}
} }
}; };
......
...@@ -24,25 +24,20 @@ template <typename Place, typename T> ...@@ -24,25 +24,20 @@ template <typename Place, typename T>
class RankLossKernel : public framework::OpKernel { class RankLossKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& ctx) const { void Compute(const framework::ExecutionContext& ctx) const {
auto* out = ctx.Output<framework::Tensor>("Out"); auto* out_t = ctx.Output<framework::LoDTensor>("Out");
auto* p_t = ctx.Input<framework::Tensor>("P"); auto* label_t = ctx.Input<framework::Tensor>("Label");
auto* oi_t = ctx.Input<framework::Tensor>("Oi"); auto* left_t = ctx.Input<framework::Tensor>("Left");
auto* oj_t = ctx.Input<framework::Tensor>("Oj"); auto* right_t = ctx.Input<framework::Tensor>("Right");
out->mutable_data<T>(ctx.GetPlace()); out_t->mutable_data<T>(ctx.GetPlace());
auto& dev = ctx.GetEigenDevice<Place>(); auto out = framework::EigenVector<T>::Flatten(*out_t);
auto out_eig = framework::EigenVector<T>::Flatten(*out); auto label = framework::EigenVector<T>::Flatten(*label_t);
auto p_eig = framework::EigenVector<T>::Flatten(*p_t); auto left = framework::EigenVector<T>::Flatten(*left_t);
auto oi_eig = framework::EigenVector<T>::Flatten(*oi_t); auto right = framework::EigenVector<T>::Flatten(*right_t);
auto oj_eig = framework::EigenVector<T>::Flatten(*oj_t);
framework::Tensor o_t;
o_t.Resize(oi_t->dims());
o_t.mutable_data<T>(ctx.GetPlace());
auto o_eig = framework::EigenVector<T>::Flatten(o_t);
o_eig.device(dev) = oi_eig - oj_eig;
out_eig.device(dev) = (1. + (o_eig).exp()).log() - p_eig * o_eig; auto& dev = ctx.GetEigenDevice<Place>();
out.device(dev) =
(1. + (left - right).exp()).log() - label * (left - right);
} }
}; };
...@@ -50,40 +45,35 @@ template <typename Place, typename T> ...@@ -50,40 +45,35 @@ template <typename Place, typename T>
class RankLossGradKernel : public framework::OpKernel { class RankLossGradKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& ctx) const { void Compute(const framework::ExecutionContext& ctx) const {
auto* d_oi = ctx.Output<framework::Tensor>(framework::GradVarName("Oi")); auto* d_left_t =
auto* d_oj = ctx.Output<framework::Tensor>(framework::GradVarName("Oj")); ctx.Output<framework::LoDTensor>(framework::GradVarName("Left"));
auto* d_p = ctx.Output<framework::Tensor>(framework::GradVarName("P")); auto* d_right_t =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Right"));
auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out")); auto* d_out_t = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* p_t = ctx.Input<framework::Tensor>("P"); auto* label_t = ctx.Input<framework::Tensor>("Label");
auto* oi_t = ctx.Input<framework::Tensor>("Oi"); auto* left_t = ctx.Input<framework::Tensor>("Left");
auto* oj_t = ctx.Input<framework::Tensor>("Oj"); auto* right_t = ctx.Input<framework::Tensor>("Right");
d_oi->mutable_data<T>(ctx.GetPlace());
d_oj->mutable_data<T>(ctx.GetPlace());
d_p->mutable_data<T>(ctx.GetPlace());
auto& dev = ctx.GetEigenDevice<Place>(); auto& dev = ctx.GetEigenDevice<Place>();
auto d_out_eig = framework::EigenVector<T>::Flatten(*d_out); auto d_out = framework::EigenVector<T>::Flatten(*d_out_t);
auto p_eig = framework::EigenVector<T>::Flatten(*p_t); auto label = framework::EigenVector<T>::Flatten(*label_t);
auto oi_eig = framework::EigenVector<T>::Flatten(*oi_t); auto left = framework::EigenVector<T>::Flatten(*left_t);
auto oj_eig = framework::EigenVector<T>::Flatten(*oj_t); auto right = framework::EigenVector<T>::Flatten(*right_t);
auto d_oi_eig = framework::EigenVector<T>::Flatten(*d_oi); // compute d_left
auto d_oj_eig = framework::EigenVector<T>::Flatten(*d_oj); if (d_left_t) {
d_left_t->mutable_data<T>(ctx.GetPlace());
framework::Tensor o_t; auto d_left = framework::EigenVector<T>::Flatten(*d_left_t);
o_t.Resize(oi_t->dims()); d_left.device(dev) = d_out * (1. / (1. + (right - left).exp()) - label);
o_t.mutable_data<T>(ctx.GetPlace()); }
auto o_eig = framework::EigenVector<T>::Flatten(o_t); // compute d_right
o_eig.device(dev) = oi_eig - oj_eig; if (d_right_t) {
d_right_t->mutable_data<T>(ctx.GetPlace());
// dOi & dOj auto d_right = framework::EigenVector<T>::Flatten(*d_right_t);
d_oi_eig.device(dev) = d_right.device(dev) =
d_out_eig * (o_eig.exp() / (1. + o_eig.exp()) - p_eig); -d_out * (1.0 / (1. + (right - left).exp()) - label);
d_oj_eig.device(dev) = -d_oi_eig; }
// dP
framework::EigenVector<T>::Flatten(*d_p).device(dev) = -o_eig;
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -3,24 +3,29 @@ import numpy as np ...@@ -3,24 +3,29 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
class TestReshapeOp(OpTest): class TestRankLossOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "rank_loss" self.op_type = "rank_loss"
num = 5 batch_size = 5
# P = {0, 1.0} or {0, 0.5, 1.0} # labels_{i} = {0, 1.0} or {0, 0.5, 1.0}
P = np.random.randint(0, 2, size=(num, num)).astype("float32") label = np.random.randint(0, 2, size=(batch_size, )).astype("float32")
Oi = np.random.random((num, num)).astype("float32") left = np.random.random((batch_size, )).astype("float32")
Oj = np.random.random((num, num)).astype("float32") right = np.random.random((batch_size, )).astype("float32")
O = Oi - Oj loss = np.log(1.0 + np.exp(left - right)) - label * (left - right)
Out = np.log(1.0 + np.exp(O)) - P * O self.inputs = {'Label': label, 'Left': left, 'Right': right}
self.inputs = {'P': P, 'Oi': Oi, 'Oj': Oj} self.outputs = {'Out': loss}
self.outputs = {'Out': Out}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad(self): def test_check_grad(self):
self.check_grad(["Oj"], "Out") self.check_grad(["Left", "Right"], "Out")
def test_check_grad_ignore_left(self):
self.check_grad(["Right"], "Out", no_grad_set=set('Left'))
def test_check_grad_ignore_right(self):
self.check_grad(["Left"], "Out", no_grad_set=set('Right'))
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册