From 0aee1f007497a29d18e50a018648634c75761742 Mon Sep 17 00:00:00 2001 From: FlyingQianMM <245467267@qq.com> Date: Sun, 16 Jun 2019 11:33:06 +0800 Subject: [PATCH] add sigmoid focal loss operator for supporting retinanet (#17895) * test=develop add sigmoid_focal_loss for supporting retinanet * test=develop add test_layers * test=develop add API.spc * test=develop alter sigmoid_focal_loss_op.cc * test=develop alter detection.py * test=develop alter API.spec * test=develop alter round 1 * test=develop alter simooid_focal_loss * test=develop alter sigmoid_focal_loss_op.cc * test=develop alter test_layers.py * test=develop alter paddle/fluid/API.spec * test=develop alter sigmoid_focal_loss_op.cu * test=develop alter paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc --- paddle/fluid/API.spec | 1 + .../fluid/operators/detection/CMakeLists.txt | 1 + .../detection/sigmoid_focal_loss_op.cc | 208 ++++++++++++++++++ .../detection/sigmoid_focal_loss_op.cu | 181 +++++++++++++++ .../detection/sigmoid_focal_loss_op.h | 128 +++++++++++ python/paddle/fluid/layers/detection.py | 69 ++++++ .../fluid/tests/unittests/test_layers.py | 22 ++ .../unittests/test_sigmoid_focal_loss_op.py | 132 +++++++++++ 8 files changed, 742 insertions(+) create mode 100644 paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc create mode 100644 paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu create mode 100644 paddle/fluid/operators/detection/sigmoid_focal_loss_op.h create mode 100644 python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index c75698c7c82..efb06d60a2f 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -349,6 +349,7 @@ paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box' paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '6d5028fd09d01ab82d296adc0ea95aee')) paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '1e164a56fe9376e18a56d22563d9f801')) paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595')) +paddle.fluid.layers.sigmoid_focal_loss (ArgSpec(args=['x', 'label', 'fg_num', 'gamma', 'alpha'], varargs=None, keywords=None, defaults=(2, 0.25)), ('document', 'aeac6aae100173b3fc7f102cf3023a3d')) paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '82b2aefeeb1b706bc4afec70928a259a')) paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', 'd1ddc75629fedee46f82e631e22c79dc')) paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'c0d00acf724691ff3480d4207036a722')) diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 2d655c3e3fc..05910699243 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -35,6 +35,7 @@ detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc) detection_library(yolo_box_op SRCS yolo_box_op.cc yolo_box_op.cu) detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu) +detection_library(sigmoid_focal_loss_op SRCS sigmoid_focal_loss_op.cc sigmoid_focal_loss_op.cu) if(WITH_GPU) detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub) diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc new file mode 100644 index 00000000000..50ff3cb120e --- /dev/null +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc @@ -0,0 +1,208 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/detection/sigmoid_focal_loss_op.h" +#include +#include +#include + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class SigmoidFocalLossOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("FgNum"), "Input(FgNum) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null."); + + auto x_dims = ctx->GetInputDim("X"); + auto labels_dims = ctx->GetInputDim("Label"); + auto fg_dims = ctx->GetInputDim("FgNum"); + + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(rank, labels_dims.size(), + "Input(X) and Input(Label) shall have the same rank."); + PADDLE_ENFORCE_EQ(fg_dims.size(), 1, "The rank of Input(FgNum) must be 1."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(labels_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(labels_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + } + + PADDLE_ENFORCE_EQ(labels_dims[rank - 1], 1UL, + "The last dimension of input(Label) should be 1."); + + ctx->ShareDim("X", /*->*/ "Out"); + ctx->ShareLoD("X", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.device_context()); + } +}; + +class SigmoidFocalLossGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("FgNum"), "Input(FgNum) should not be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Output(X@GRAD) should not be null."); + + auto x_dims = ctx->GetInputDim("X"); + auto labels_dims = ctx->GetInputDim("Label"); + auto fg_dims = ctx->GetInputDim("FgNum"); + auto dout_dims = ctx->GetInputDim(framework::GradVarName("Out")); + + int rank = x_dims.size(); + PADDLE_ENFORCE_EQ(rank, labels_dims.size(), + "Input(X) and Input(Label) shall have the same rank."); + PADDLE_ENFORCE_EQ(fg_dims.size(), 1, "The rank of Input(FgNum) must be 1."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(labels_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(labels_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape."); + + PADDLE_ENFORCE_EQ(labels_dims[rank - 1], 1UL, + "The last dimension of input(Label) should be 1."); + + PADDLE_ENFORCE_EQ( + framework::slice_ddim(x_dims, 0, rank), + framework::slice_ddim(dout_dims, 0, rank), + "Input(X) and Input(Out@Grad) shall have the same shape."); + } + + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.device_context()); + } +}; + +class SigmoidFocalLossOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor), a 2-D tensor with shape [N, D], " + "where N is the batch size and D is the number of classes " + "(excluding background). This input is a tensor of logits " + "computed by the previous operator."); + AddInput("Label", + "(Tensor, default Tensor), a 2-D tensor with shape [N, 1]. " + "This input is a tensor of probabilistic labels."); + AddInput("FgNum", + "(Tensor, default Tensor), a 1-D tensor with shape [1]. " + "This input is the number of foreground."); + AddOutput( + "Out", + "(Tensor, default Tensor), a 2-D tensor with shape [N, D]. " + "This output is the focal loss."); + AddAttr( + "gamma", + "Hyper-parameter of sigmoid focal loss op, which is to balance the " + "easy and hard examples. " + "A float scalar with default value 2.0.") + .SetDefault(2.0); + AddAttr( + "alpha", + "Hyper-parameter of sigmoid focal loss op, which is to balance the " + "positive and negative examples. " + "A float scalar with default value 0.5.") + .SetDefault(0.25); + AddComment(R"DOC( +Sigmoid Focal Loss Operator. + +Focal loss is used to address the foreground-background class imbalance existed +on the training phase of one-stage detectors. This operator computes the sigmoid +value for each element in the input tensor, after which focal loss is measured. + +The focal loss is given as follows: + +$$Loss_j = (-Label_j * alpha * \pow(1 - \sigma(X_j), gamma) * \log(\sigma(X_j)) - +(1 - Labels_j) * (1 - alpha) * \pow(\sigma(X_j), gamma) * \log(1 - \sigma(X_j))) +/ FgNum, j = 1,...,K$$ + +We know that $$\sigma(X_j) = \\frac{1}{1 + \exp(-X_j)}$$. + +)DOC"); + } +}; + +class SigmoidFocalLossGradOpDescMaker + : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("sigmoid_focal_loss_grad"); + op->SetInput("X", Input("X")); + op->SetInput("Label", Input("Label")); + op->SetInput("FgNum", Input("FgNum")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetAttrMap(Attrs()); + return op; + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(sigmoid_focal_loss, ops::SigmoidFocalLossOp, + ops::SigmoidFocalLossOpMaker, + ops::SigmoidFocalLossGradOpDescMaker); +REGISTER_OPERATOR(sigmoid_focal_loss_grad, ops::SigmoidFocalLossGradOp); +REGISTER_OP_CPU_KERNEL( + sigmoid_focal_loss, + ops::SigmoidFocalLossKernel, + ops::SigmoidFocalLossKernel); +REGISTER_OP_CPU_KERNEL( + sigmoid_focal_loss_grad, + ops::SigmoidFocalLossGradKernel, + ops::SigmoidFocalLossGradKernel); diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu new file mode 100644 index 00000000000..b603e2f48fe --- /dev/null +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu @@ -0,0 +1,181 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "cub/cub.cuh" +#include "paddle/fluid/operators/detection/sigmoid_focal_loss_op.h" +#include "paddle/fluid/operators/math.h" +#include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/hostdevice.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +static constexpr int kNumCUDAThreads = 512; +static constexpr int kNumMaxinumNumBlocks = 4096; + +static inline int NumBlocks(const int N) { + return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, + kNumMaxinumNumBlocks); +} + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) + +template +__global__ void GPUSigmoidFocalLossForward(const T *x_data, + const int *label_data, + const int *fg_num_data, + const T gamma, const T alpha, + const int num_classes, + const int limit, T *out_data) { + CUDA_1D_KERNEL_LOOP(i, limit) { + T x = x_data[i]; + int a = i / num_classes; // current sample + int d = i % num_classes; // current class + int g = label_data[a]; // target + + // check whether the input data is positive or negative + // the target classes are in range 1-81 + // and the d is in range 0-80 + T c_pos = static_cast(g == (d + 1)); + T c_neg = static_cast((g != -1) & (g != (d + 1))); + + T fg_num = static_cast((fg_num_data[0] > 1) ? fg_num_data[0] : 1); + T s_neg = (1.0 - alpha) / fg_num; + T s_pos = alpha / fg_num; + + // p = 1. / 1. + expf(-x) + T p = 1. / (1. + real_exp(-x)); + + // (1 - p)**gamma * log(p) + T term_pos = + std::pow((1. - p), gamma) * real_log(p > FLT_MIN ? p : FLT_MIN); + // p**gamma * log(1 - p) + T term_neg = + std::pow(p, gamma) * + (-1. * x * (x >= 0) - real_log(1. + real_exp(x - 2. * x * (x >= 0)))); + + out_data[i] = 0.0; + out_data[i] += -c_pos * term_pos * s_pos; + out_data[i] += -c_neg * term_neg * s_neg; + } +} + +template +__global__ void GPUSigmoidFocalLossBackward( + const T *x_data, const int *label_data, const int *fg_num_data, + const T gamma, const T alpha, const int num_classes, const T *dout_data, + const int limit, T *dx_data) { + CUDA_1D_KERNEL_LOOP(i, limit) { + T x = x_data[i]; + T dout = dout_data[i]; + + int a = i / num_classes; // current sample + int d = i % num_classes; // current class + + T fg_num = static_cast((fg_num_data[0] > 1) ? fg_num_data[0] : 1); + T s_neg = (1.0 - alpha) / fg_num; + T s_pos = alpha / fg_num; + + int g = label_data[a]; + T c_pos = static_cast(g == (d + 1)); + T c_neg = static_cast((g != -1) & (g != (d + 1))); + + T p = 1. / (1. + real_exp(-x)); + + // (1-p)**g * (1 - p - g*p*log(p)) + T term_pos = std::pow((1. - p), gamma) * + (1. - p - (p * gamma * real_log(p > FLT_MIN ? p : FLT_MIN))); + // (p**g) * (g*(1-p)*log(1-p) - p) + T term_neg = + std::pow(p, gamma) * + ((-1. * x * (x >= 0) - real_log(1. + real_exp(x - 2. * x * (x >= 0)))) * + (1. - p) * gamma - + p); + + dx_data[i] = 0.0; + dx_data[i] += -c_pos * s_pos * term_pos; + dx_data[i] += -c_neg * s_neg * term_neg; + dx_data[i] = dx_data[i] * dout; + } +} + +template +class GPUSigmoidFocalLossKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + const Tensor *X = context.Input("X"); + const Tensor *Labels = context.Input("Label"); + const Tensor *FgNum = context.Input("FgNum"); + Tensor *Out = context.Output("Out"); + T gamma = static_cast(context.Attr("gamma")); + T alpha = static_cast(context.Attr("alpha")); + auto x_dims = X->dims(); + int num_classes = static_cast(x_dims[1]); + auto out_data = Out->mutable_data(context.GetPlace()); + + auto &dev_ctx = context.cuda_device_context(); + + int limit = Out->numel(); + int blocks = NumBlocks(limit); + int threads = kNumCUDAThreads; + GPUSigmoidFocalLossForward<<>>( + X->data(), Labels->data(), FgNum->data(), gamma, alpha, + num_classes, limit, out_data); + } +}; + +template +class GPUSigmoidFocalLossGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + const Tensor *X = context.Input("X"); + const Tensor *Labels = context.Input("Label"); + const Tensor *FgNum = context.Input("FgNum"); + const Tensor *dOut = context.Input(framework::GradVarName("Out")); + Tensor *dX = context.Output(framework::GradVarName("X")); + auto dx_data = dX->mutable_data(context.GetPlace()); + T gamma = static_cast(context.Attr("gamma")); + T alpha = static_cast(context.Attr("alpha")); + auto x_dims = X->dims(); + int num_classes = static_cast(x_dims[1]); + + auto &dev_ctx = context.cuda_device_context(); + + int limit = dX->numel(); + int blocks = NumBlocks(limit); + int threads = kNumCUDAThreads; + GPUSigmoidFocalLossBackward<<>>( + X->data(), Labels->data(), FgNum->data(), gamma, alpha, + num_classes, dOut->data(), limit, dx_data); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + sigmoid_focal_loss, + ops::GPUSigmoidFocalLossKernel, + ops::GPUSigmoidFocalLossKernel); +REGISTER_OP_CUDA_KERNEL( + sigmoid_focal_loss_grad, + ops::GPUSigmoidFocalLossGradKernel, + ops::GPUSigmoidFocalLossGradKernel); diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h new file mode 100644 index 00000000000..529a74e5300 --- /dev/null +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h @@ -0,0 +1,128 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class SigmoidFocalLossKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + const Tensor *X = context.Input("X"); + const Tensor *Labels = context.Input("Label"); + const Tensor *FgNum = context.Input("FgNum"); + Tensor *Out = context.Output("Out"); + T gamma = static_cast(context.Attr("gamma")); + T alpha = static_cast(context.Attr("alpha")); + auto out_data = Out->mutable_data(context.GetPlace()); + int limit = Out->numel(); + auto x_data = X->data(); + auto label_data = Labels->data(); + auto fg_num_data = FgNum->data(); + auto x_dims = X->dims(); + int num_classes = static_cast(x_dims[1]); + + for (int idx = 0; idx < limit; ++idx) { + T x = x_data[idx]; + int a = idx / num_classes; // current sample + int d = idx % num_classes; // current class + int g = label_data[a]; // target + + // Check whether the input data is positive or negative + // The target classes are in range 1-81 + // and the d is in range 0-80 + T c_pos = static_cast(g == (d + 1)); + T c_neg = static_cast((g != -1) & (g != (d + 1))); + T fg_num = static_cast((fg_num_data[0] > 1) ? fg_num_data[0] : 1); + T s_neg = (1.0 - alpha) / fg_num; + T s_pos = alpha / fg_num; + + // p = 1. / 1. + expf(-x) + T p = 1. / (1. + std::exp(-x)); + + // (1 - p)**gamma * log(p) where + T term_pos = + std::pow((1. - p), gamma) * std::log(p > FLT_MIN ? p : FLT_MIN); + // p**gamma * log(1 - p) + float term_neg = + std::pow(p, gamma) * + (-1. * x * (x >= 0) - std::log(1. + std::exp(x - 2. * x * (x >= 0)))); + out_data[idx] = 0.0; + out_data[idx] += -c_pos * term_pos * s_pos; + out_data[idx] += -c_neg * term_neg * s_neg; + } + } +}; + +template +class SigmoidFocalLossGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + const Tensor *X = context.Input("X"); + const Tensor *Labels = context.Input("Label"); + const Tensor *FgNum = context.Input("FgNum"); + const Tensor *dOut = context.Input(framework::GradVarName("Out")); + Tensor *dX = context.Output(framework::GradVarName("X")); + auto dx_data = dX->mutable_data(context.GetPlace()); + T gamma = static_cast(context.Attr("gamma")); + T alpha = static_cast(context.Attr("alpha")); + auto x_dims = X->dims(); + int num_classes = static_cast(x_dims[1]); + + int limit = dX->numel(); + auto x_data = X->data(); + auto label_data = Labels->data(); + auto fg_num_data = FgNum->data(); + auto dout_data = dOut->data(); + for (int idx = 0; idx < limit; ++idx) { + T x = x_data[idx]; + int a = idx / num_classes; // current sample + int d = idx % num_classes; // current class + + T fg_num = static_cast((fg_num_data[0] > 1) ? fg_num_data[0] : 1); + T s_neg = static_cast((1.0 - alpha) / fg_num); + T s_pos = alpha / fg_num; + int g = label_data[a]; + + T c_pos = static_cast(g == (d + 1)); + T c_neg = static_cast((g != -1) & (g != (d + 1))); + T p = 1. / (1. + std::exp(-x)); + + // (1-p)**g * (1 - p - g*p*log(p)) + T term_pos = std::pow((1. - p), gamma) * + (1. - p - (p * gamma * std::log(p > FLT_MIN ? p : FLT_MIN))); + // (p**g) * (g*(1-p)*log(1-p) - p) + T term_neg = std::pow(p, gamma) * + ((-1. * x * (x >= 0) - + std::log(1. + std::exp(x - 2. * x * (x >= 0)))) * + (1. - p) * gamma - + p); + + dx_data[idx] = 0.0; + dx_data[idx] += -c_pos * s_pos * term_pos; + dx_data[idx] += -c_neg * s_neg * term_neg; + dx_data[idx] = dx_data[idx] * dout_data[idx]; + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 73ae3340b94..7b3a7faadde 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -40,6 +40,7 @@ __all__ = [ 'ssd_loss', 'rpn_target_assign', 'retinanet_target_assign', + 'sigmoid_focal_loss', 'anchor_generator', 'roi_perspective_transform', 'generate_proposal_labels', @@ -368,6 +369,74 @@ def rpn_target_assign(bbox_pred, return predicted_cls_logits, predicted_bbox_pred, target_label, target_bbox, bbox_inside_weight +def sigmoid_focal_loss(x, label, fg_num, gamma=2, alpha=0.25): + """ + **Sigmoid Focal Loss Operator.** + + Focal loss is used to address the foreground-background class imbalance existed + on the training phase of one-stage detectors. This operator computes the sigmoid + value for each element in the input tensor, after which focal loss is measured. + + The focal loss is given as followed: + + .. math:: + loss_j = (-label_j * alpha * {(1 - \\sigma(x_j))}^{gamma} * \\log(\\sigma(x_j)) - + (1 - labels_j) * (1 - alpha) * {(\sigma(x_j)}^{ gamma} * \\log(1 - \\sigma(x_j))) + / fg\_num, j = 1,...,K + + We know that + + .. math:: + \\sigma(x_j) = \\frac{1}{1 + \\exp(-x_j)} + + Args: + x(Variable): A 2-D tensor with shape [N, D], where N is the batch size and D is the number + of classes (excluding background). This input is a tensor of logits computed by the + previous operator. + label(Variable): A 2-D tensor with shape [N, 1], which is the probabilistic labels. + fg_num(Variable): A 1-D tensor with shape [1], which is the number of foreground. + + gamma(float): Hyper-parameter to balance the easy and hard examples. Default value is + set to 2.0. + alpha(float): Hyper-parameter to balance the positive and negative example. Default value + is set to 0.25. + + Returns: + out(Variable): A 2-D tensor with shape [N, D], which is the focal loss. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + input = fluid.layers.data( + name='data', shape=[10,80], append_batch_size=False, dtype='float32') + label = fluid.layers.data( + name='label', shape=[10,1], append_batch_size=False, dtype='int32') + fg_num = fluid.layers.data( + name='fg_num', shape=[1], append_batch_size=False, dtype='int32') + loss = fluid.layers.sigmoid_focal_loss(x=input, + label=label, + fg_num=fg_num, + gamma=2., + alpha=0.25) + """ + + helper = LayerHelper("sigmoid_focal_loss", **locals()) + + out = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="sigmoid_focal_loss", + inputs={"X": x, + "Label": label, + "FgNum": fg_num}, + attrs={"gamma": gamma, + 'alpha': alpha}, + outputs={"Out": out}) + return out + + def detection_output(loc, scores, prior_box, diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 7fbadd40c37..8c2b4259ea9 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -2071,6 +2071,28 @@ class TestBook(LayerTest): bbox_pred, cls_logits, anchor_box, anchor_var, gt_boxes, gt_labels, is_crowd, im_info, 10)) + def test_sigmoid_focal_loss(self): + with program_guard(fluid.default_main_program(), + fluid.default_startup_program()): + input = layers.data( + name='data', + shape=[10, 80], + append_batch_size=False, + dtype='float32') + label = layers.data( + name='label', + shape=[10, 1], + append_batch_size=False, + dtype='int32') + fg_num = layers.data( + name='fg_num', + shape=[1], + append_batch_size=False, + dtype='int32') + out = fluid.layers.sigmoid_focal_loss( + x=input, label=label, fg_num=fg_num, gamma=2., alpha=0.25) + return (out) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py new file mode 100644 index 00000000000..0e846521d0a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sigmoid_focal_loss_op.py @@ -0,0 +1,132 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import math +import copy +from op_test import OpTest +from paddle.fluid import core + + +def sigmoid_focal_loss_forward(x_data, label_data, fg_num_data, gamma, alpha, + num_classes): + x_data_t = copy.deepcopy(x_data) + out_data = copy.deepcopy(x_data) + x_width = len(x_data) + x_height = len(x_data[0, :]) + x_data_t = x_data_t.flatten() + out_data = out_data.flatten() + for idx in range(len(x_data_t)): + x = x_data_t[idx] + a = int(idx / num_classes) + d = int(idx % num_classes) + label = label_data[a] + c_pos = float((int(label) == int(d + 1))) + c_neg = float(((int(label) != -1) & (int(label) != (d + 1)))) + fg_num = max(fg_num_data, 1) + z_neg = (1.0 - alpha) / fg_num + z_pos = alpha / fg_num + + p = 1. / (1. + math.exp(-x)) + FLT_MIN = 1.175494351e-38 + term_pos = math.pow((1. - p), gamma) * math.log(max(FLT_MIN, p)) + term_neg = math.pow(p, gamma) * ( + -1. * x * (x >= 0) - math.log(1. + math.exp(x - 2. * x * (x >= 0)))) + out_data[idx] = 0.0 + out_data[idx] += -c_pos * term_pos * z_pos + out_data[idx] += -c_neg * term_neg * z_neg + + out_data = out_data.reshape(x_width, x_height) + return out_data + + +class TestSigmoidFocalLossOp1(OpTest): + def set_argument(self): + self.num_anchors = 10 + self.num_classes = 10 + self.gamma = 2.0 + self.alpha = 0.25 + + def setUp(self): + self.set_argument() + + dims = (self.num_anchors, self.num_classes) + X = np.random.standard_normal(dims).astype("float32") + L = np.random.randint(0, self.num_classes + 1, + (dims[0], 1)).astype("int32") + F = np.zeros(1) + F[0] = len(np.where(L > 0)[0]) + F = F.astype("int32") + + self.op_type = "sigmoid_focal_loss" + self.inputs = { + 'X': X, + 'Label': L, + 'FgNum': F, + } + self.attrs = { + 'gamma': self.gamma, + 'alpha': self.alpha, + } + loss = sigmoid_focal_loss_forward( + self.inputs['X'], self.inputs['Label'], self.inputs['FgNum'], + self.gamma, self.alpha, self.num_classes) + self.outputs = {'Out': loss.astype('float32')} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestSigmoidFocalLossOp2(TestSigmoidFocalLossOp1): + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=2e-3) + + def test_check_grad(self): + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=0.002) + + +class TestSigmoidFocalLossOp3(TestSigmoidFocalLossOp1): + def set_argument(self): + self.num_anchors = 200 + self.num_classes = 10 + self.gamma = 1.0 + self.alpha = 0.5 + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestSigmoidFocalLossOp4(TestSigmoidFocalLossOp3): + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=2e-3) + + def test_check_grad(self): + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=0.002) + + +if __name__ == '__main__': + unittest.main() -- GitLab