diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc index 4b307140c5df26bd529ed19a14bfa09a96667cec..c8ee13875c5ae772de3c09f97fded8f70c5698e6 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc @@ -115,18 +115,22 @@ class TeacherStudentSigmoidLossOpMaker AddOutput("Y", "(Tensor, default Tensor), a 2-D tensor with shape " "[N x 1]. The teacher student sigmoid loss."); - AddAttr("soft_max_up_bound", "fp32, default 15.0").SetDefault(15.0); - AddAttr("soft_max_lower_bound", "fp32, default -15.0") + AddAttr( + "soft_max_up_bound", + "fp32, if input > soft_max_up_bound, will be bound, default 15.0") + .SetDefault(15.0); + AddAttr( + "soft_max_lower_bound", + "fp32, if input < soft_max_lower_bound, will be bound, default -15.0") .SetDefault(-15.0); AddComment(R"DOC( TeacherStudentSigmoidLoss Operator. -TeacherStudentSigmoidLoss Operator. It's similarity to SigmoidCrossEntropyWithLogits Operator. The difference is that we add another label(z') to original. loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + max(x, 0) - x * z' + log(1 + exp(-abs(x))) z is click or not - z' is value q of feed_fine + z' is teacher value label = {-2, -1, [0, 2]} when z' is not exist, clk = 0 : label = -2; when z' is not exist, clk = 1 : label = -1; @@ -137,104 +141,6 @@ we add another label(z') to original. } }; -// template -template -class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE(platform::is_cpu_place(context.GetPlace()), - "This kernel only runs on CPU."); - - Tensor* y = context.Output("Y"); - const Tensor* x = context.Input("X"); - const Tensor* labels = context.Input("Label"); - T* y_data = y->mutable_data(context.GetPlace()); - const T* x_data = x->data(); - const T* label_data = labels->data(); - int64_t batch_size = x->dims()[0]; - // loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + max(x, 0) - x * z' + - // log(1 + exp(-abs(x))) - // z is click or not - // z' is value q of feed_fine - // label = {-2, -1, [0, 2]} - // when z' is not exist, clk = 0 : label = -2; - // when z' is not exist, clk = 1 : label = -1; - // when z' is exist , clk = 0 : label = 0 + z'; - // when z' is exist , clk = 1 : label = 1 + z'; - for (int i = 0; i < batch_size; ++i) { - if (label_data[i] < -1.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) + - log(1.0 + exp(-fabs(x_data[i]))); - } else if (label_data[i] < 0.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) - x_data[i] + - log(1.0 + exp(-fabs(x_data[i]))); - } else if (label_data[i] < 1.0) { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) + - log(1.0 + exp(-fabs(x_data[i]))) + - (x_data[i] > 0 ? x_data[i] : 0.0) - - x_data[i] * label_data[i] + - log(1.0 + exp(-fabs(x_data[i]))); - } else { - y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) - x_data[i] + - log(1.0 + exp(-fabs(x_data[i]))) + - (x_data[i] > 0 ? x_data[i] : 0.0) - - x_data[i] * (label_data[i] - 1.0) + - log(1.0 + exp(-fabs(x_data[i]))); - } - } - } -}; - -template -class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const Tensor* x = context.Input("X"); - const T* x_data = x->data(); - - Tensor* dx = context.Output(framework::GradVarName("X")); - T* dx_data = dx->mutable_data(context.GetPlace()); - - const Tensor* labels = context.Input("Label"); - const T* label_data = labels->data(); - - T soft_max_up_bound = - static_cast(context.Attr("soft_max_up_bound")); - T soft_max_lower_bound = - static_cast(context.Attr("soft_max_lower_bound")); - - int64_t batch_size = x->dims()[0]; - - const framework::Tensor* dOut = - context.Input(framework::GradVarName("Y")); - - const T* dout_data = dOut->data(); - - for (int i = 0; i < batch_size; ++i) { - T sum_val = x_data[i]; - if (sum_val > soft_max_up_bound) { - sum_val = soft_max_up_bound; - } else { - if (sum_val < soft_max_lower_bound) { - sum_val = soft_max_lower_bound; - } - } - - T pred = 1.0 / (1.0 + exp(-sum_val)); - if (label_data[i] < -1.0) { - dx_data[i] = 0.0 - pred; - } else if (label_data[i] < 0.0) { - dx_data[i] = 1.0 - pred; - } else { - dx_data[i] = label_data[i] - 2.0 * pred; - } - if (sum_val >= soft_max_up_bound || sum_val <= soft_max_lower_bound) { - dx_data[i] = 0; - } - dx_data[i] *= dout_data[i] * -1; - } - } -}; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h index f8e64c4d18b8a30a50c758ed5d1ffcc851dc064c..41d2662ae2a4d37222323d6a536ed3af1ab7e056 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h @@ -20,6 +20,99 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; +template +class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + Tensor* y = context.Output("Y"); + const Tensor* x = context.Input("X"); + const Tensor* labels = context.Input("Label"); + T* y_data = y->mutable_data(context.GetPlace()); + const T* x_data = x->data(); + const T* label_data = labels->data(); + int64_t batch_size = x->dims()[0]; + // loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + max(x, 0) - x * z' + + // log(1 + exp(-abs(x))) + // z is click or not + // z' is value q of feed_fine + // label = {-2, -1, [0, 2]} + // when z' is not exist, clk = 0 : label = -2; + // when z' is not exist, clk = 1 : label = -1; + // when z' is exist , clk = 0 : label = 0 + z'; + // when z' is exist , clk = 1 : label = 1 + z'; + for (int i = 0; i < batch_size; ++i) { + if (label_data[i] < -1.0) { + y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) + + log(1.0 + exp(-fabs(x_data[i]))); + } else if (label_data[i] < 0.0) { + y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) - x_data[i] + + log(1.0 + exp(-fabs(x_data[i]))); + } else if (label_data[i] < 1.0) { + y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) + + log(1.0 + exp(-fabs(x_data[i]))) + + (x_data[i] > 0 ? x_data[i] : 0.0) - + x_data[i] * label_data[i] + + log(1.0 + exp(-fabs(x_data[i]))); + } else { + y_data[i] = (x_data[i] > 0 ? x_data[i] : 0.0) - x_data[i] + + log(1.0 + exp(-fabs(x_data[i]))) + + (x_data[i] > 0 ? x_data[i] : 0.0) - + x_data[i] * (label_data[i] - 1.0) + + log(1.0 + exp(-fabs(x_data[i]))); + } + } + } +}; +template +class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* x = context.Input("X"); + const T* x_data = x->data(); + + Tensor* dx = context.Output(framework::GradVarName("X")); + T* dx_data = dx->mutable_data(context.GetPlace()); + + const Tensor* labels = context.Input("Label"); + const T* label_data = labels->data(); + + T soft_max_up_bound = + static_cast(context.Attr("soft_max_up_bound")); + T soft_max_lower_bound = + static_cast(context.Attr("soft_max_lower_bound")); + + int64_t batch_size = x->dims()[0]; + + const framework::Tensor* dOut = + context.Input(framework::GradVarName("Y")); + + const T* dout_data = dOut->data(); + + for (int i = 0; i < batch_size; ++i) { + T sum_val = x_data[i]; + if (sum_val > soft_max_up_bound) { + sum_val = soft_max_up_bound; + } else { + if (sum_val < soft_max_lower_bound) { + sum_val = soft_max_lower_bound; + } + } + + T pred = 1.0 / (1.0 + exp(-sum_val)); + if (label_data[i] < -1.0) { + dx_data[i] = 0.0 - pred; + } else if (label_data[i] < 0.0) { + dx_data[i] = 1.0 - pred; + } else { + dx_data[i] = label_data[i] - 2.0 * pred; + } + if (sum_val >= soft_max_up_bound || sum_val <= soft_max_lower_bound) { + dx_data[i] = 0; + } + dx_data[i] *= dout_data[i] * -1; + } + } +}; } // namespace operators } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py b/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py index faa5163b32075b4ba36f8c3de347076a88aa1700..26bf0fd88368ed27e142e8515ec57a6c6bebd6fa 100644 --- a/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py @@ -27,9 +27,6 @@ class TestTeacherStudentSigmoidLossOp(OpTest): """ def setUp(self): - """ - ut - """ self.op_type = "teacher_student_sigmoid_loss" batch_size = 16 num_classes = 1 @@ -50,21 +47,13 @@ class TestTeacherStudentSigmoidLossOp(OpTest): elif label < 1.0: outs.append(max(x, 0.0) + log(1.0 + exp(-abs(x))) + \ max(x, 0.0) - x * label + log(1.0 + exp(-abs(x)))) - #print "33 python x:", x, "python label:", label, "term1:", max(x, 0.0) + log(1.0 + exp(-abs(x))), "term2:", max(x, 0.0) - x * label + log(1.0 + exp(-abs(x))) else: outs.append(max(x, 0.0) - x + log(1.0 + exp(-abs(x))) + \ max(x, 0.0) - x * (label - 1.0) + log(1.0 + exp(-abs(x)))) - #print "44 python x:", x, "python label:", label, "term1:", max(x, 0.0) - x + log(1.0 + exp(-abs(x))), "term2:", max(x, 0.0) - x * (label - 1.0) + log(1.0 + exp(-abs(x))) self.outputs = {'Y': np.array(outs)} def test_check_output(self): - """ - ut - """ self.check_output() def test_check_grad(self): - """ - ut - """ self.check_grad(["X"], "Y", numeric_grad_delta=0.005)