diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 71fc059728956b6178572a0dd8dbae85327c34fd..ce22f099447785f5324009c63dc425da0a3332b2 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -571,12 +571,17 @@ class OperatorWithKernel : public OperatorBase { if (has_phi_kernel) { return true; } else { - auto& op_kernels = OperatorWithKernel::AllOpKernels().at(type_); - return std::any_of( - op_kernels.begin(), op_kernels.end(), - [](OpKernelMap::const_reference kern_pair) { - return platform::is_gpu_place(kern_pair.first.place_); - }); + auto kernel_iter = OperatorWithKernel::AllOpKernels().find(type_); + if (kernel_iter == OperatorWithKernel::AllOpKernels().end()) { + return false; + } else { + auto& op_kernels = kernel_iter->second; + return std::any_of( + op_kernels.begin(), op_kernels.end(), + [](OpKernelMap::const_reference kern_pair) { + return platform::is_gpu_place(kern_pair.first.place_); + }); + } } } diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc index ffe5a4fccd66d88da0ab007ac98899d3c388aa18..21044734ca80170dacb501b588098830d75f2af2 100644 --- a/paddle/fluid/operators/detection/yolov3_loss_op.cc +++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc @@ -9,10 +9,12 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/detection/yolov3_loss_op.h" #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/imperative/type_defs.h" +#include "paddle/phi/core/infermeta_utils.h" +#include "paddle/phi/infermeta/multiary.h" namespace paddle { namespace operators { @@ -22,127 +24,6 @@ using framework::Tensor; class Yolov3LossOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Yolov3LossOp"); - OP_INOUT_CHECK(ctx->HasInput("GTBox"), "Input", "GTBox", "Yolov3LossOp"); - OP_INOUT_CHECK(ctx->HasInput("GTLabel"), "Input", "GTLabel", - "Yolov3LossOp"); - OP_INOUT_CHECK(ctx->HasOutput("Loss"), "Output", "Loss", "Yolov3LossOp"); - OP_INOUT_CHECK(ctx->HasOutput("ObjectnessMask"), "Output", "ObjectnessMask", - "Yolov3LossOp"); - OP_INOUT_CHECK(ctx->HasOutput("GTMatchMask"), "Output", "GTMatchMask", - "Yolov3LossOp"); - - auto dim_x = ctx->GetInputDim("X"); - auto dim_gtbox = ctx->GetInputDim("GTBox"); - auto dim_gtlabel = ctx->GetInputDim("GTLabel"); - auto anchors = ctx->Attrs().Get>("anchors"); - int anchor_num = anchors.size() / 2; - auto anchor_mask = ctx->Attrs().Get>("anchor_mask"); - int mask_num = anchor_mask.size(); - auto class_num = ctx->Attrs().Get("class_num"); - - PADDLE_ENFORCE_EQ(dim_x.size(), 4, - platform::errors::InvalidArgument( - "Input(X) should be a 4-D tensor. But received " - "X dimension size(%s)", - dim_x.size())); - PADDLE_ENFORCE_EQ(dim_x[2], dim_x[3], - platform::errors::InvalidArgument( - "Input(X) dim[3] and dim[4] should be euqal." - "But received dim[3](%s) != dim[4](%s)", - dim_x[2], dim_x[3])); - PADDLE_ENFORCE_EQ( - dim_x[1], mask_num * (5 + class_num), - platform::errors::InvalidArgument( - "Input(X) dim[1] should be equal to (anchor_mask_number * (5 " - "+ class_num))." - "But received dim[1](%s) != (anchor_mask_number * " - "(5+class_num)(%s).", - dim_x[1], mask_num * (5 + class_num))); - PADDLE_ENFORCE_EQ(dim_gtbox.size(), 3, - platform::errors::InvalidArgument( - "Input(GTBox) should be a 3-D tensor, but " - "received gtbox dimension size(%s)", - dim_gtbox.size())); - PADDLE_ENFORCE_EQ(dim_gtbox[2], 4, - platform::errors::InvalidArgument( - "Input(GTBox) dim[2] should be 4", - "But receive dim[2](%s) != 5. ", dim_gtbox[2])); - PADDLE_ENFORCE_EQ( - dim_gtlabel.size(), 2, - platform::errors::InvalidArgument( - "Input(GTLabel) should be a 2-D tensor," - "But received Input(GTLabel) dimension size(%s) != 2.", - dim_gtlabel.size())); - PADDLE_ENFORCE_EQ( - dim_gtlabel[0], dim_gtbox[0], - platform::errors::InvalidArgument( - "Input(GTBox) dim[0] and Input(GTLabel) dim[0] should be same," - "But received Input(GTLabel) dim[0](%s) != " - "Input(GTBox) dim[0](%s)", - dim_gtlabel[0], dim_gtbox[0])); - PADDLE_ENFORCE_EQ( - dim_gtlabel[1], dim_gtbox[1], - platform::errors::InvalidArgument( - "Input(GTBox) and Input(GTLabel) dim[1] should be same," - "But received Input(GTBox) dim[1](%s) != Input(GTLabel) " - "dim[1](%s)", - dim_gtbox[1], dim_gtlabel[1])); - PADDLE_ENFORCE_GT(anchors.size(), 0, - platform::errors::InvalidArgument( - "Attr(anchors) length should be greater then 0." - "But received anchors length(%s)", - anchors.size())); - PADDLE_ENFORCE_EQ(anchors.size() % 2, 0, - platform::errors::InvalidArgument( - "Attr(anchors) length should be even integer." - "But received anchors length(%s)", - anchors.size())); - for (size_t i = 0; i < anchor_mask.size(); i++) { - PADDLE_ENFORCE_LT( - anchor_mask[i], anchor_num, - platform::errors::InvalidArgument( - "Attr(anchor_mask) should not crossover Attr(anchors)." - "But received anchor_mask[i](%s) > anchor_num(%s)", - anchor_mask[i], anchor_num)); - } - PADDLE_ENFORCE_GT(class_num, 0, - platform::errors::InvalidArgument( - "Attr(class_num) should be an integer greater then 0." - "But received class_num(%s) < 0", - class_num)); - - if (ctx->HasInput("GTScore")) { - auto dim_gtscore = ctx->GetInputDim("GTScore"); - PADDLE_ENFORCE_EQ(dim_gtscore.size(), 2, - platform::errors::InvalidArgument( - "Input(GTScore) should be a 2-D tensor" - "But received GTScore dimension(%s)", - dim_gtbox.size())); - PADDLE_ENFORCE_EQ( - dim_gtscore[0], dim_gtbox[0], - platform::errors::InvalidArgument( - "Input(GTBox) and Input(GTScore) dim[0] should be same" - "But received GTBox dim[0](%s) != GTScore dim[0](%s)", - dim_gtbox[0], dim_gtscore[0])); - PADDLE_ENFORCE_EQ( - dim_gtscore[1], dim_gtbox[1], - platform::errors::InvalidArgument( - "Input(GTBox) and Input(GTScore) dim[1] should be same" - "But received GTBox dim[1](%s) != GTScore dim[1](%s)", - dim_gtscore[1], dim_gtbox[1])); - } - - std::vector dim_out({dim_x[0]}); - ctx->SetOutputDim("Loss", phi::make_ddim(dim_out)); - - std::vector dim_obj_mask({dim_x[0], mask_num, dim_x[2], dim_x[3]}); - ctx->SetOutputDim("ObjectnessMask", phi::make_ddim(dim_obj_mask)); - - std::vector dim_gt_match_mask({dim_gtbox[0], dim_gtbox[1]}); - ctx->SetOutputDim("GTMatchMask", phi::make_ddim(dim_gt_match_mask)); - } protected: framework::OpKernelType GetExpectedKernelType( @@ -347,11 +228,10 @@ class Yolov3LossGradMaker : public framework::SingleGradOpMaker { } // namespace paddle namespace ops = paddle::operators; +DECLARE_INFER_SHAPE_FUNCTOR(yolov3_loss, Yolov3LossInferShapeFunctor, + PD_INFER_META(phi::Yolov3LossInferMeta)); REGISTER_OPERATOR(yolov3_loss, ops::Yolov3LossOp, ops::Yolov3LossOpMaker, ops::Yolov3LossGradMaker, - ops::Yolov3LossGradMaker); + ops::Yolov3LossGradMaker, + Yolov3LossInferShapeFunctor); REGISTER_OPERATOR(yolov3_loss_grad, ops::Yolov3LossOpGrad); -REGISTER_OP_CPU_KERNEL(yolov3_loss, ops::Yolov3LossKernel, - ops::Yolov3LossKernel); -REGISTER_OP_CPU_KERNEL(yolov3_loss_grad, ops::Yolov3LossGradKernel, - ops::Yolov3LossGradKernel); diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.h b/paddle/fluid/operators/detection/yolov3_loss_op.h deleted file mode 100644 index fa1700b22d9ead9192bd9005ef98b3c8e0c40baf..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/detection/yolov3_loss_op.h +++ /dev/null @@ -1,506 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once -#include -#include -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; -template -using EigenTensor = framework::EigenTensor; -template -using EigenVector = framework::EigenVector; - -template -static inline bool LessEqualZero(T x) { - return x < 1e-6; -} - -template -static T SigmoidCrossEntropy(T x, T label) { - return (x > 0 ? x : 0.0) - x * label + std::log(1.0 + std::exp(-std::abs(x))); -} - -template -static T L1Loss(T x, T y) { - return std::abs(y - x); -} - -template -static T SigmoidCrossEntropyGrad(T x, T label) { - return 1.0 / (1.0 + std::exp(-x)) - label; -} - -template -static T L1LossGrad(T x, T y) { - return x > y ? 1.0 : -1.0; -} - -static int GetMaskIndex(std::vector mask, int val) { - for (size_t i = 0; i < mask.size(); i++) { - if (mask[i] == val) { - return i; - } - } - return -1; -} - -template -struct Box { - T x, y, w, h; -}; - -template -static inline T sigmoid(T x) { - return 1.0 / (1.0 + std::exp(-x)); -} - -template -static inline Box GetYoloBox(const T* x, std::vector anchors, int i, - int j, int an_idx, int grid_size, - int input_size, int index, int stride, - float scale, float bias) { - Box b; - b.x = (i + sigmoid(x[index]) * scale + bias) / grid_size; - b.y = (j + sigmoid(x[index + stride]) * scale + bias) / grid_size; - b.w = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] / input_size; - b.h = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] / input_size; - return b; -} - -template -static inline Box GetGtBox(const T* gt, int batch, int max_boxes, int idx) { - Box b; - b.x = gt[(batch * max_boxes + idx) * 4]; - b.y = gt[(batch * max_boxes + idx) * 4 + 1]; - b.w = gt[(batch * max_boxes + idx) * 4 + 2]; - b.h = gt[(batch * max_boxes + idx) * 4 + 3]; - return b; -} - -template -static inline T BoxOverlap(T c1, T w1, T c2, T w2) { - T l1 = c1 - w1 / 2.0; - T l2 = c2 - w2 / 2.0; - T left = l1 > l2 ? l1 : l2; - T r1 = c1 + w1 / 2.0; - T r2 = c2 + w2 / 2.0; - T right = r1 < r2 ? r1 : r2; - return right - left; -} - -template -static inline T CalcBoxIoU(Box b1, Box b2) { - T w = BoxOverlap(b1.x, b1.w, b2.x, b2.w); - T h = BoxOverlap(b1.y, b1.h, b2.y, b2.h); - T inter_area = (w < 0 || h < 0) ? 0.0 : w * h; - T union_area = b1.w * b1.h + b2.w * b2.h - inter_area; - return inter_area / union_area; -} - -static inline int GetEntryIndex(int batch, int an_idx, int hw_idx, int an_num, - int an_stride, int stride, int entry) { - return (batch * an_num + an_idx) * an_stride + entry * stride + hw_idx; -} - -template -static void CalcBoxLocationLoss(T* loss, const T* input, Box gt, - std::vector anchors, int an_idx, - int box_idx, int gi, int gj, int grid_size, - int input_size, int stride, T score) { - T tx = gt.x * grid_size - gi; - T ty = gt.y * grid_size - gj; - T tw = std::log(gt.w * input_size / anchors[2 * an_idx]); - T th = std::log(gt.h * input_size / anchors[2 * an_idx + 1]); - - T scale = (2.0 - gt.w * gt.h) * score; - loss[0] += SigmoidCrossEntropy(input[box_idx], tx) * scale; - loss[0] += SigmoidCrossEntropy(input[box_idx + stride], ty) * scale; - loss[0] += L1Loss(input[box_idx + 2 * stride], tw) * scale; - loss[0] += L1Loss(input[box_idx + 3 * stride], th) * scale; -} - -template -static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input, - Box gt, std::vector anchors, - int an_idx, int box_idx, int gi, int gj, - int grid_size, int input_size, int stride, - T score) { - T tx = gt.x * grid_size - gi; - T ty = gt.y * grid_size - gj; - T tw = std::log(gt.w * input_size / anchors[2 * an_idx]); - T th = std::log(gt.h * input_size / anchors[2 * an_idx + 1]); - - T scale = (2.0 - gt.w * gt.h) * score; - input_grad[box_idx] = - SigmoidCrossEntropyGrad(input[box_idx], tx) * scale * loss; - input_grad[box_idx + stride] = - SigmoidCrossEntropyGrad(input[box_idx + stride], ty) * scale * loss; - input_grad[box_idx + 2 * stride] = - L1LossGrad(input[box_idx + 2 * stride], tw) * scale * loss; - input_grad[box_idx + 3 * stride] = - L1LossGrad(input[box_idx + 3 * stride], th) * scale * loss; -} - -template -static inline void CalcLabelLoss(T* loss, const T* input, const int index, - const int label, const int class_num, - const int stride, const T pos, const T neg, - T score) { - for (int i = 0; i < class_num; i++) { - T pred = input[index + i * stride]; - loss[0] += SigmoidCrossEntropy(pred, (i == label) ? pos : neg) * score; - } -} - -template -static inline void CalcLabelLossGrad(T* input_grad, const T loss, - const T* input, const int index, - const int label, const int class_num, - const int stride, const T pos, const T neg, - T score) { - for (int i = 0; i < class_num; i++) { - T pred = input[index + i * stride]; - input_grad[index + i * stride] = - SigmoidCrossEntropyGrad(pred, (i == label) ? pos : neg) * score * - loss; - } -} - -template -static inline void CalcObjnessLoss(T* loss, const T* input, const T* objness, - const int n, const int an_num, const int h, - const int w, const int stride, - const int an_stride) { - for (int i = 0; i < n; i++) { - for (int j = 0; j < an_num; j++) { - for (int k = 0; k < h; k++) { - for (int l = 0; l < w; l++) { - T obj = objness[k * w + l]; - if (obj > 1e-5) { - // positive sample: obj = mixup score - loss[i] += SigmoidCrossEntropy(input[k * w + l], 1.0) * obj; - } else if (obj > -0.5) { - // negetive sample: obj = 0 - loss[i] += SigmoidCrossEntropy(input[k * w + l], 0.0); - } - } - } - objness += stride; - input += an_stride; - } - } -} - -template -static inline void CalcObjnessLossGrad(T* input_grad, const T* loss, - const T* input, const T* objness, - const int n, const int an_num, - const int h, const int w, - const int stride, const int an_stride) { - for (int i = 0; i < n; i++) { - for (int j = 0; j < an_num; j++) { - for (int k = 0; k < h; k++) { - for (int l = 0; l < w; l++) { - T obj = objness[k * w + l]; - if (obj > 1e-5) { - input_grad[k * w + l] = - SigmoidCrossEntropyGrad(input[k * w + l], 1.0) * obj * - loss[i]; - } else if (obj > -0.5) { - input_grad[k * w + l] = - SigmoidCrossEntropyGrad(input[k * w + l], 0.0) * loss[i]; - } - } - } - objness += stride; - input += an_stride; - input_grad += an_stride; - } - } -} - -template -static void inline GtValid(bool* valid, const T* gtbox, const int n, - const int b) { - for (int i = 0; i < n; i++) { - for (int j = 0; j < b; j++) { - if (LessEqualZero(gtbox[j * 4 + 2]) || LessEqualZero(gtbox[j * 4 + 3])) { - valid[j] = false; - } else { - valid[j] = true; - } - } - valid += b; - gtbox += b * 4; - } -} - -template -class Yolov3LossKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* gt_box = ctx.Input("GTBox"); - auto* gt_label = ctx.Input("GTLabel"); - auto* gt_score = ctx.Input("GTScore"); - auto* loss = ctx.Output("Loss"); - auto* objness_mask = ctx.Output("ObjectnessMask"); - auto* gt_match_mask = ctx.Output("GTMatchMask"); - auto anchors = ctx.Attr>("anchors"); - auto anchor_mask = ctx.Attr>("anchor_mask"); - int class_num = ctx.Attr("class_num"); - float ignore_thresh = ctx.Attr("ignore_thresh"); - int downsample_ratio = ctx.Attr("downsample_ratio"); - bool use_label_smooth = ctx.Attr("use_label_smooth"); - float scale = ctx.Attr("scale_x_y"); - float bias = -0.5 * (scale - 1.); - - const int n = input->dims()[0]; - const int h = input->dims()[2]; - const int w = input->dims()[3]; - const int an_num = anchors.size() / 2; - const int mask_num = anchor_mask.size(); - const int b = gt_box->dims()[1]; - int input_size = downsample_ratio * h; - - const int stride = h * w; - const int an_stride = (class_num + 5) * stride; - - T label_pos = 1.0; - T label_neg = 0.0; - if (use_label_smooth) { - T smooth_weight = std::min(1.0 / static_cast(class_num), 1.0 / 40); - label_pos = 1.0 - smooth_weight; - label_neg = smooth_weight; - } - - const T* input_data = input->data(); - const T* gt_box_data = gt_box->data(); - const int* gt_label_data = gt_label->data(); - T* loss_data = loss->mutable_data({n}, ctx.GetPlace()); - memset(loss_data, 0, loss->numel() * sizeof(T)); - T* obj_mask_data = - objness_mask->mutable_data({n, mask_num, h, w}, ctx.GetPlace()); - memset(obj_mask_data, 0, objness_mask->numel() * sizeof(T)); - int* gt_match_mask_data = - gt_match_mask->mutable_data({n, b}, ctx.GetPlace()); - - const T* gt_score_data; - Tensor gtscore; - if (!gt_score) { - gtscore.mutable_data({n, b}, ctx.GetPlace()); - phi::funcs::SetConstant()( - ctx.template device_context(), >score, - static_cast(1.0)); - gt_score = >score; - gt_score_data = gtscore.data(); - } else { - gt_score_data = gt_score->data(); - } - - // calc valid gt box mask, avoid calc duplicately in following code - Tensor gt_valid_mask; - bool* gt_valid_mask_data = - gt_valid_mask.mutable_data({n, b}, ctx.GetPlace()); - GtValid(gt_valid_mask_data, gt_box_data, n, b); - - for (int i = 0; i < n; i++) { - for (int j = 0; j < mask_num; j++) { - for (int k = 0; k < h; k++) { - for (int l = 0; l < w; l++) { - // each predict box find a best match gt box, if overlap is bigger - // then ignore_thresh, ignore the objectness loss. - int box_idx = - GetEntryIndex(i, j, k * w + l, mask_num, an_stride, stride, 0); - Box pred = - GetYoloBox(input_data, anchors, l, k, anchor_mask[j], h, - input_size, box_idx, stride, scale, bias); - T best_iou = 0; - for (int t = 0; t < b; t++) { - if (!gt_valid_mask_data[i * b + t]) { - continue; - } - Box gt = GetGtBox(gt_box_data, i, b, t); - T iou = CalcBoxIoU(pred, gt); - if (iou > best_iou) { - best_iou = iou; - } - } - - // If best IoU is bigger then ignore_thresh, - // ignore the objectness loss. - if (best_iou > ignore_thresh) { - int obj_idx = (i * mask_num + j) * stride + k * w + l; - obj_mask_data[obj_idx] = static_cast(-1); - } - // all losses should be calculated if best IoU - // is bigger then truth thresh, but currently, - // truth thresh is an unreachable value as 1.0. - } - } - } - for (int t = 0; t < b; t++) { - if (!gt_valid_mask_data[i * b + t]) { - gt_match_mask_data[i * b + t] = -1; - continue; - } - Box gt = GetGtBox(gt_box_data, i, b, t); - int gi = static_cast(gt.x * w); - int gj = static_cast(gt.y * h); - Box gt_shift = gt; - gt_shift.x = 0.0; - gt_shift.y = 0.0; - T best_iou = 0.0; - int best_n = 0; - // each gt box find a best match anchor box as positive sample, - // for positive sample, all losses should be calculated, and for - // other samples, only objectness loss is required. - for (int an_idx = 0; an_idx < an_num; an_idx++) { - Box an_box; - an_box.x = 0.0; - an_box.y = 0.0; - an_box.w = anchors[2 * an_idx] / static_cast(input_size); - an_box.h = anchors[2 * an_idx + 1] / static_cast(input_size); - float iou = CalcBoxIoU(an_box, gt_shift); - if (iou > best_iou) { - best_iou = iou; - best_n = an_idx; - } - } - - int mask_idx = GetMaskIndex(anchor_mask, best_n); - gt_match_mask_data[i * b + t] = mask_idx; - if (mask_idx >= 0) { - T score = gt_score_data[i * b + t]; - int box_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, - an_stride, stride, 0); - CalcBoxLocationLoss(loss_data + i, input_data, gt, anchors, best_n, - box_idx, gi, gj, h, input_size, stride, score); - - int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi; - obj_mask_data[obj_idx] = score; - - int label = gt_label_data[i * b + t]; - int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, - an_stride, stride, 5); - CalcLabelLoss(loss_data + i, input_data, label_idx, label, - class_num, stride, label_pos, label_neg, score); - } - } - } - - CalcObjnessLoss(loss_data, input_data + 4 * stride, obj_mask_data, n, - mask_num, h, w, stride, an_stride); - } -}; - -template -class Yolov3LossGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* gt_box = ctx.Input("GTBox"); - auto* gt_label = ctx.Input("GTLabel"); - auto* gt_score = ctx.Input("GTScore"); - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); - auto* objness_mask = ctx.Input("ObjectnessMask"); - auto* gt_match_mask = ctx.Input("GTMatchMask"); - auto anchors = ctx.Attr>("anchors"); - auto anchor_mask = ctx.Attr>("anchor_mask"); - int class_num = ctx.Attr("class_num"); - int downsample_ratio = ctx.Attr("downsample_ratio"); - bool use_label_smooth = ctx.Attr("use_label_smooth"); - - const int n = input_grad->dims()[0]; - const int c = input_grad->dims()[1]; - const int h = input_grad->dims()[2]; - const int w = input_grad->dims()[3]; - const int mask_num = anchor_mask.size(); - const int b = gt_match_mask->dims()[1]; - int input_size = downsample_ratio * h; - - const int stride = h * w; - const int an_stride = (class_num + 5) * stride; - - T label_pos = 1.0; - T label_neg = 0.0; - if (use_label_smooth) { - T smooth_weight = std::min(1.0 / static_cast(class_num), 1.0 / 40); - label_pos = 1.0 - smooth_weight; - label_neg = smooth_weight; - } - - const T* input_data = input->data(); - const T* gt_box_data = gt_box->data(); - const int* gt_label_data = gt_label->data(); - const T* loss_grad_data = loss_grad->data(); - const T* obj_mask_data = objness_mask->data(); - const int* gt_match_mask_data = gt_match_mask->data(); - T* input_grad_data = - input_grad->mutable_data({n, c, h, w}, ctx.GetPlace()); - memset(input_grad_data, 0, input_grad->numel() * sizeof(T)); - - const T* gt_score_data; - Tensor gtscore; - if (!gt_score) { - gtscore.mutable_data({n, b}, ctx.GetPlace()); - phi::funcs::SetConstant()( - ctx.template device_context(), >score, - static_cast(1.0)); - gt_score = >score; - gt_score_data = gtscore.data(); - } else { - gt_score_data = gt_score->data(); - } - - for (int i = 0; i < n; i++) { - for (int t = 0; t < b; t++) { - int mask_idx = gt_match_mask_data[i * b + t]; - if (mask_idx >= 0) { - T score = gt_score_data[i * b + t]; - Box gt = GetGtBox(gt_box_data, i, b, t); - int gi = static_cast(gt.x * w); - int gj = static_cast(gt.y * h); - - int box_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, - an_stride, stride, 0); - CalcBoxLocationLossGrad(input_grad_data, loss_grad_data[i], - input_data, gt, anchors, - anchor_mask[mask_idx], box_idx, gi, gj, h, - input_size, stride, score); - - int label = gt_label_data[i * b + t]; - int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, - an_stride, stride, 5); - CalcLabelLossGrad(input_grad_data, loss_grad_data[i], input_data, - label_idx, label, class_num, stride, label_pos, - label_neg, score); - } - } - } - - CalcObjnessLossGrad(input_grad_data + 4 * stride, loss_grad_data, - input_data + 4 * stride, obj_mask_data, n, mask_num, - h, w, stride, an_stride); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 1e261abbcc28d588d8ca38151a3ba2700d886c49..f196744c0411e9e0c3d6194c675c7ae6cc8a25ca 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -1229,6 +1229,153 @@ void WhereInferMeta(const MetaTensor& condition, out->share_meta(x); } +void Yolov3LossInferMeta(const MetaTensor& x, + const MetaTensor& gt_box, + const MetaTensor& gt_label, + const paddle::optional gt_score, + const std::vector& anchors, + const std::vector& anchor_mask, + int class_num, + float ignore_thresh, + int downsample_ratio, + bool use_label_smooth, + float scale_x_y, + MetaTensor* loss, + MetaTensor* objectness_mask, + MetaTensor* gt_match_mask) { + auto dim_x = x.dims(); + auto dim_gtbox = gt_box.dims(); + auto dim_gtlabel = gt_label.dims(); + int anchor_num = anchors.size() / 2; + int mask_num = anchor_mask.size(); + + PADDLE_ENFORCE_EQ(dim_x.size(), + 4, + phi::errors::InvalidArgument( + "Input(X) should be a 4-D tensor. But received " + "X dimension size(%s)", + dim_x.size())); + PADDLE_ENFORCE_EQ( + dim_x[2], + dim_x[3], + phi::errors::InvalidArgument("Input(X) dim[3] and dim[4] should be euqal." + "But received dim[3](%s) != dim[4](%s)", + dim_x[2], + dim_x[3])); + PADDLE_ENFORCE_EQ( + dim_x[1], + mask_num * (5 + class_num), + phi::errors::InvalidArgument( + "Input(X) dim[1] should be equal to (anchor_mask_number * (5 " + "+ class_num))." + "But received dim[1](%s) != (anchor_mask_number * " + "(5+class_num)(%s).", + dim_x[1], + mask_num * (5 + class_num))); + PADDLE_ENFORCE_EQ( + dim_gtbox.size(), + 3, + phi::errors::InvalidArgument("Input(GTBox) should be a 3-D tensor, but " + "received gtbox dimension size(%s)", + dim_gtbox.size())); + PADDLE_ENFORCE_EQ( + dim_gtbox[2], + 4, + phi::errors::InvalidArgument("Input(GTBox) dim[2] should be 4", + "But receive dim[2](%s) != 5. ", + dim_gtbox[2])); + PADDLE_ENFORCE_EQ(dim_gtlabel.size(), + 2, + phi::errors::InvalidArgument( + "Input(GTLabel) should be a 2-D tensor," + "But received Input(GTLabel) dimension size(%s) != 2.", + dim_gtlabel.size())); + PADDLE_ENFORCE_EQ( + dim_gtlabel[0], + dim_gtbox[0], + phi::errors::InvalidArgument( + "Input(GTBox) dim[0] and Input(GTLabel) dim[0] should be same," + "But received Input(GTLabel) dim[0](%s) != " + "Input(GTBox) dim[0](%s)", + dim_gtlabel[0], + dim_gtbox[0])); + PADDLE_ENFORCE_EQ( + dim_gtlabel[1], + dim_gtbox[1], + phi::errors::InvalidArgument( + "Input(GTBox) and Input(GTLabel) dim[1] should be same," + "But received Input(GTBox) dim[1](%s) != Input(GTLabel) " + "dim[1](%s)", + dim_gtbox[1], + dim_gtlabel[1])); + PADDLE_ENFORCE_GT(anchors.size(), + 0, + phi::errors::InvalidArgument( + "Attr(anchors) length should be greater then 0." + "But received anchors length(%s)", + anchors.size())); + PADDLE_ENFORCE_EQ(anchors.size() % 2, + 0, + phi::errors::InvalidArgument( + "Attr(anchors) length should be even integer." + "But received anchors length(%s)", + anchors.size())); + for (size_t i = 0; i < anchor_mask.size(); i++) { + PADDLE_ENFORCE_LT( + anchor_mask[i], + anchor_num, + phi::errors::InvalidArgument( + "Attr(anchor_mask) should not crossover Attr(anchors)." + "But received anchor_mask[i](%s) > anchor_num(%s)", + anchor_mask[i], + anchor_num)); + } + PADDLE_ENFORCE_GT(class_num, + 0, + phi::errors::InvalidArgument( + "Attr(class_num) should be an integer greater then 0." + "But received class_num(%s) < 0", + class_num)); + + if (gt_score.get_ptr()) { + auto dim_gtscore = gt_score->dims(); + PADDLE_ENFORCE_EQ( + dim_gtscore.size(), + 2, + phi::errors::InvalidArgument("Input(GTScore) should be a 2-D tensor" + "But received GTScore dimension(%s)", + dim_gtbox.size())); + PADDLE_ENFORCE_EQ( + dim_gtscore[0], + dim_gtbox[0], + phi::errors::InvalidArgument( + "Input(GTBox) and Input(GTScore) dim[0] should be same" + "But received GTBox dim[0](%s) != GTScore dim[0](%s)", + dim_gtbox[0], + dim_gtscore[0])); + PADDLE_ENFORCE_EQ( + dim_gtscore[1], + dim_gtbox[1], + phi::errors::InvalidArgument( + "Input(GTBox) and Input(GTScore) dim[1] should be same" + "But received GTBox dim[1](%s) != GTScore dim[1](%s)", + dim_gtscore[1], + dim_gtbox[1])); + } + + std::vector dim_out({dim_x[0]}); + loss->set_dims(phi::make_ddim(dim_out)); + loss->set_dtype(x.dtype()); + + std::vector dim_obj_mask({dim_x[0], mask_num, dim_x[2], dim_x[3]}); + objectness_mask->set_dims(phi::make_ddim(dim_obj_mask)); + objectness_mask->set_dtype(x.dtype()); + + std::vector dim_gt_match_mask({dim_gtbox[0], dim_gtbox[1]}); + gt_match_mask->set_dims(phi::make_ddim(dim_gt_match_mask)); + gt_match_mask->set_dtype(x.dtype()); +} + } // namespace phi PD_REGISTER_INFER_META_FN(batch_norm, phi::BatchNormInferMeta); diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 6261d521e0e5b299d8caa6f4ae0ea73523107058..6abbf1c0ef478b26b40cbb60c7d2286d57421079 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -245,4 +245,19 @@ void WhereInferMeta(const MetaTensor& condition, const MetaTensor& y, MetaTensor* out); +void Yolov3LossInferMeta(const MetaTensor& x, + const MetaTensor& gt_box, + const MetaTensor& gt_label, + const paddle::optional gt_score, + const std::vector& anchors, + const std::vector& anchor_mask, + int class_num, + float ignore_thresh, + int downsample_ratio, + bool use_label_smooth, + float scale_x_y, + MetaTensor* loss, + MetaTensor* objectness_mask, + MetaTensor* gt_match_mask); + } // namespace phi diff --git a/paddle/phi/kernels/cpu/yolov3_loss_functor.h b/paddle/phi/kernels/cpu/yolov3_loss_functor.h new file mode 100644 index 0000000000000000000000000000000000000000..b96b3241a5aa8353bd9344ac9410e5b0f180d541 --- /dev/null +++ b/paddle/phi/kernels/cpu/yolov3_loss_functor.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace phi { + +template +struct Box { + T x, y, w, h; +}; + +template +static inline T sigmoid(T x) { + return 1.0 / (1.0 + std::exp(-x)); +} + +template +static inline Box GetGtBox(const T* gt, int batch, int max_boxes, int idx) { + Box b; + b.x = gt[(batch * max_boxes + idx) * 4]; + b.y = gt[(batch * max_boxes + idx) * 4 + 1]; + b.w = gt[(batch * max_boxes + idx) * 4 + 2]; + b.h = gt[(batch * max_boxes + idx) * 4 + 3]; + return b; +} + +static inline int GetEntryIndex(int batch, + int an_idx, + int hw_idx, + int an_num, + int an_stride, + int stride, + int entry) { + return (batch * an_num + an_idx) * an_stride + entry * stride + hw_idx; +} + +} // namespace phi diff --git a/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..acd9a99cef4de2f3f2bf919ff56d73c1882a7808 --- /dev/null +++ b/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc @@ -0,0 +1,245 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/phi/kernels/yolov3_loss_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/yolov3_loss_functor.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { + +template +static T SigmoidCrossEntropyGrad(T x, T label) { + return 1.0 / (1.0 + std::exp(-x)) - label; +} + +template +static T L1LossGrad(T x, T y) { + return x > y ? 1.0 : -1.0; +} + +template +static void CalcBoxLocationLossGrad(T* input_grad, + const T loss, + const T* input, + Box gt, + std::vector anchors, + int an_idx, + int box_idx, + int gi, + int gj, + int grid_size, + int input_size, + int stride, + T score) { + T tx = gt.x * grid_size - gi; + T ty = gt.y * grid_size - gj; + T tw = std::log(gt.w * input_size / anchors[2 * an_idx]); + T th = std::log(gt.h * input_size / anchors[2 * an_idx + 1]); + + T scale = (2.0 - gt.w * gt.h) * score; + input_grad[box_idx] = + SigmoidCrossEntropyGrad(input[box_idx], tx) * scale * loss; + input_grad[box_idx + stride] = + SigmoidCrossEntropyGrad(input[box_idx + stride], ty) * scale * loss; + input_grad[box_idx + 2 * stride] = + L1LossGrad(input[box_idx + 2 * stride], tw) * scale * loss; + input_grad[box_idx + 3 * stride] = + L1LossGrad(input[box_idx + 3 * stride], th) * scale * loss; +} + +template +static inline void CalcLabelLossGrad(T* input_grad, + const T loss, + const T* input, + const int index, + const int label, + const int class_num, + const int stride, + const T pos, + const T neg, + T score) { + for (int i = 0; i < class_num; i++) { + T pred = input[index + i * stride]; + input_grad[index + i * stride] = + SigmoidCrossEntropyGrad(pred, (i == label) ? pos : neg) * score * + loss; + } +} + +template +static inline void CalcObjnessLossGrad(T* input_grad, + const T* loss, + const T* input, + const T* objness, + const int n, + const int an_num, + const int h, + const int w, + const int stride, + const int an_stride) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < an_num; j++) { + for (int k = 0; k < h; k++) { + for (int l = 0; l < w; l++) { + T obj = objness[k * w + l]; + if (obj > 1e-5) { + input_grad[k * w + l] = + SigmoidCrossEntropyGrad(input[k * w + l], 1.0) * obj * + loss[i]; + } else if (obj > -0.5) { + input_grad[k * w + l] = + SigmoidCrossEntropyGrad(input[k * w + l], 0.0) * loss[i]; + } + } + } + objness += stride; + input += an_stride; + input_grad += an_stride; + } + } +} + +template +void Yolov3LossGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& gt_box, + const DenseTensor& gt_label, + paddle::optional gt_score, + const DenseTensor& loss_grad, + const DenseTensor& objectness_mask, + const DenseTensor& gt_match_mask, + const std::vector& anchors, + const std::vector& anchor_mask, + int class_num, + float ignore_thresh, + int downsample_ratio, + bool use_label_smooth, + float scale_x_y, + DenseTensor* x_grad, + DenseTensor* gt_box_grad, + DenseTensor* gt_label_grad, + DenseTensor* gt_score_grad) { + auto* input = &x; + auto input_grad = x_grad; + auto* objness_mask = &objectness_mask; + + const int n = input_grad->dims()[0]; + const int c = input_grad->dims()[1]; + const int h = input_grad->dims()[2]; + const int w = input_grad->dims()[3]; + const int mask_num = anchor_mask.size(); + const int b = gt_match_mask.dims()[1]; + int input_size = downsample_ratio * h; + + const int stride = h * w; + const int an_stride = (class_num + 5) * stride; + + T label_pos = 1.0; + T label_neg = 0.0; + if (use_label_smooth) { + T smooth_weight = std::min(1.0 / static_cast(class_num), 1.0 / 40); + label_pos = 1.0 - smooth_weight; + label_neg = smooth_weight; + } + + const T* input_data = input->data(); + const T* gt_box_data = gt_box.data(); + const int* gt_label_data = gt_label.data(); + const T* loss_grad_data = loss_grad.data(); + const T* obj_mask_data = objness_mask->data(); + const int* gt_match_mask_data = gt_match_mask.data(); + input_grad->Resize({n, c, h, w}); + T* input_grad_data = dev_ctx.template Alloc(input_grad); + memset(input_grad_data, 0, input_grad->numel() * sizeof(T)); + + const T* gt_score_data; + DenseTensor gtscore; + if (!(gt_score.is_initialized())) { + gtscore.Resize({n, b}); + dev_ctx.template Alloc(>score); + phi::funcs::SetConstant()( + dev_ctx, >score, static_cast(1.0)); + gt_score_data = gtscore.data(); + } else { + gt_score_data = gt_score.get_ptr()->data(); + } + + for (int i = 0; i < n; i++) { + for (int t = 0; t < b; t++) { + int mask_idx = gt_match_mask_data[i * b + t]; + if (mask_idx >= 0) { + T score = gt_score_data[i * b + t]; + Box gt = GetGtBox(gt_box_data, i, b, t); + int gi = static_cast(gt.x * w); + int gj = static_cast(gt.y * h); + + int box_idx = GetEntryIndex( + i, mask_idx, gj * w + gi, mask_num, an_stride, stride, 0); + CalcBoxLocationLossGrad(input_grad_data, + loss_grad_data[i], + input_data, + gt, + anchors, + anchor_mask[mask_idx], + box_idx, + gi, + gj, + h, + input_size, + stride, + score); + + int label = gt_label_data[i * b + t]; + int label_idx = GetEntryIndex( + i, mask_idx, gj * w + gi, mask_num, an_stride, stride, 5); + CalcLabelLossGrad(input_grad_data, + loss_grad_data[i], + input_data, + label_idx, + label, + class_num, + stride, + label_pos, + label_neg, + score); + } + } + } + + CalcObjnessLossGrad(input_grad_data + 4 * stride, + loss_grad_data, + input_data + 4 * stride, + obj_mask_data, + n, + mask_num, + h, + w, + stride, + an_stride); +} + +} // namespace phi + +PD_REGISTER_KERNEL(yolov3_loss_grad, + CPU, + ALL_LAYOUT, + phi::Yolov3LossGradKernel, + float, + double) {} diff --git a/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc b/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..6df910eea02a9bbf1daeb344e4ee41c6204bc54e --- /dev/null +++ b/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc @@ -0,0 +1,374 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/phi/kernels/yolov3_loss_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/yolov3_loss_functor.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { + +template +static inline bool LessEqualZero(T x) { + return x < 1e-6; +} + +template +static T SigmoidCrossEntropy(T x, T label) { + return (x > 0 ? x : 0.0) - x * label + std::log(1.0 + std::exp(-std::abs(x))); +} + +template +static T L1Loss(T x, T y) { + return std::abs(y - x); +} + +static int GetMaskIndex(std::vector mask, int val) { + for (size_t i = 0; i < mask.size(); i++) { + if (mask[i] == val) { + return i; + } + } + return -1; +} + +template +static inline Box GetYoloBox(const T* x, + std::vector anchors, + int i, + int j, + int an_idx, + int grid_size, + int input_size, + int index, + int stride, + float scale, + float bias) { + Box b; + b.x = (i + sigmoid(x[index]) * scale + bias) / grid_size; + b.y = (j + sigmoid(x[index + stride]) * scale + bias) / grid_size; + b.w = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] / input_size; + b.h = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] / input_size; + return b; +} + +template +static inline T BoxOverlap(T c1, T w1, T c2, T w2) { + T l1 = c1 - w1 / 2.0; + T l2 = c2 - w2 / 2.0; + T left = l1 > l2 ? l1 : l2; + T r1 = c1 + w1 / 2.0; + T r2 = c2 + w2 / 2.0; + T right = r1 < r2 ? r1 : r2; + return right - left; +} + +template +static inline T CalcBoxIoU(Box b1, Box b2) { + T w = BoxOverlap(b1.x, b1.w, b2.x, b2.w); + T h = BoxOverlap(b1.y, b1.h, b2.y, b2.h); + T inter_area = (w < 0 || h < 0) ? 0.0 : w * h; + T union_area = b1.w * b1.h + b2.w * b2.h - inter_area; + return inter_area / union_area; +} + +template +static void CalcBoxLocationLoss(T* loss, + const T* input, + Box gt, + std::vector anchors, + int an_idx, + int box_idx, + int gi, + int gj, + int grid_size, + int input_size, + int stride, + T score) { + T tx = gt.x * grid_size - gi; + T ty = gt.y * grid_size - gj; + T tw = std::log(gt.w * input_size / anchors[2 * an_idx]); + T th = std::log(gt.h * input_size / anchors[2 * an_idx + 1]); + + T scale = (2.0 - gt.w * gt.h) * score; + loss[0] += SigmoidCrossEntropy(input[box_idx], tx) * scale; + loss[0] += SigmoidCrossEntropy(input[box_idx + stride], ty) * scale; + loss[0] += L1Loss(input[box_idx + 2 * stride], tw) * scale; + loss[0] += L1Loss(input[box_idx + 3 * stride], th) * scale; +} + +template +static inline void CalcLabelLoss(T* loss, + const T* input, + const int index, + const int label, + const int class_num, + const int stride, + const T pos, + const T neg, + T score) { + for (int i = 0; i < class_num; i++) { + T pred = input[index + i * stride]; + loss[0] += SigmoidCrossEntropy(pred, (i == label) ? pos : neg) * score; + } +} + +template +static inline void CalcObjnessLoss(T* loss, + const T* input, + const T* objness, + const int n, + const int an_num, + const int h, + const int w, + const int stride, + const int an_stride) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < an_num; j++) { + for (int k = 0; k < h; k++) { + for (int l = 0; l < w; l++) { + T obj = objness[k * w + l]; + if (obj > 1e-5) { + // positive sample: obj = mixup score + loss[i] += SigmoidCrossEntropy(input[k * w + l], 1.0) * obj; + } else if (obj > -0.5) { + // negetive sample: obj = 0 + loss[i] += SigmoidCrossEntropy(input[k * w + l], 0.0); + } + } + } + objness += stride; + input += an_stride; + } + } +} + +template +static void inline GtValid(bool* valid, + const T* gtbox, + const int n, + const int b) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < b; j++) { + if (LessEqualZero(gtbox[j * 4 + 2]) || LessEqualZero(gtbox[j * 4 + 3])) { + valid[j] = false; + } else { + valid[j] = true; + } + } + valid += b; + gtbox += b * 4; + } +} + +template +void Yolov3LossKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& gt_box, + const DenseTensor& gt_label, + paddle::optional gt_score, + const std::vector& anchors, + const std::vector& anchor_mask, + int class_num, + float ignore_thresh, + int downsample_ratio, + bool use_label_smooth, + float scale_x_y, + DenseTensor* loss, + DenseTensor* objectness_mask, + DenseTensor* gt_match_mask) { + auto* input = &x; + auto objness_mask = objectness_mask; + float scale = scale_x_y; + float bias = -0.5 * (scale - 1.); + + const int n = input->dims()[0]; + const int h = input->dims()[2]; + const int w = input->dims()[3]; + const int an_num = anchors.size() / 2; + const int mask_num = anchor_mask.size(); + const int b = gt_box.dims()[1]; + int input_size = downsample_ratio * h; + + const int stride = h * w; + const int an_stride = (class_num + 5) * stride; + + T label_pos = 1.0; + T label_neg = 0.0; + if (use_label_smooth) { + T smooth_weight = std::min(1.0 / static_cast(class_num), 1.0 / 40); + label_pos = 1.0 - smooth_weight; + label_neg = smooth_weight; + } + + const T* input_data = input->data(); + const T* gt_box_data = gt_box.data(); + const int* gt_label_data = gt_label.data(); + loss->Resize({n}); + T* loss_data = dev_ctx.template Alloc(loss); + memset(loss_data, 0, loss->numel() * sizeof(T)); + objness_mask->Resize({n, mask_num, h, w}); + T* obj_mask_data = dev_ctx.template Alloc(objness_mask); + memset(obj_mask_data, 0, objness_mask->numel() * sizeof(T)); + gt_match_mask->Resize({n, b}); + int* gt_match_mask_data = dev_ctx.template Alloc(gt_match_mask); + + const T* gt_score_data; + DenseTensor gtscore; + if (!(gt_score.is_initialized())) { + gtscore.Resize({n, b}); + dev_ctx.template Alloc(>score); + phi::funcs::SetConstant()( + dev_ctx, >score, static_cast(1.0)); + gt_score_data = gtscore.data(); + } else { + gt_score_data = gt_score.get_ptr()->data(); + } + + // calc valid gt box mask, avoid calc duplicately in following code + DenseTensor gt_valid_mask; + gt_valid_mask.Resize({n, b}); + bool* gt_valid_mask_data = dev_ctx.template Alloc(>_valid_mask); + GtValid(gt_valid_mask_data, gt_box_data, n, b); + + for (int i = 0; i < n; i++) { + for (int j = 0; j < mask_num; j++) { + for (int k = 0; k < h; k++) { + for (int l = 0; l < w; l++) { + // each predict box find a best match gt box, if overlap is bigger + // then ignore_thresh, ignore the objectness loss. + int box_idx = + GetEntryIndex(i, j, k * w + l, mask_num, an_stride, stride, 0); + Box pred = GetYoloBox(input_data, + anchors, + l, + k, + anchor_mask[j], + h, + input_size, + box_idx, + stride, + scale, + bias); + T best_iou = 0; + for (int t = 0; t < b; t++) { + if (!gt_valid_mask_data[i * b + t]) { + continue; + } + Box gt = GetGtBox(gt_box_data, i, b, t); + T iou = CalcBoxIoU(pred, gt); + if (iou > best_iou) { + best_iou = iou; + } + } + + // If best IoU is bigger then ignore_thresh, + // ignore the objectness loss. + if (best_iou > ignore_thresh) { + int obj_idx = (i * mask_num + j) * stride + k * w + l; + obj_mask_data[obj_idx] = static_cast(-1); + } + // all losses should be calculated if best IoU + // is bigger then truth thresh, but currently, + // truth thresh is an unreachable value as 1.0. + } + } + } + for (int t = 0; t < b; t++) { + if (!gt_valid_mask_data[i * b + t]) { + gt_match_mask_data[i * b + t] = -1; + continue; + } + Box gt = GetGtBox(gt_box_data, i, b, t); + int gi = static_cast(gt.x * w); + int gj = static_cast(gt.y * h); + Box gt_shift = gt; + gt_shift.x = 0.0; + gt_shift.y = 0.0; + T best_iou = 0.0; + int best_n = 0; + // each gt box find a best match anchor box as positive sample, + // for positive sample, all losses should be calculated, and for + // other samples, only objectness loss is required. + for (int an_idx = 0; an_idx < an_num; an_idx++) { + Box an_box; + an_box.x = 0.0; + an_box.y = 0.0; + an_box.w = anchors[2 * an_idx] / static_cast(input_size); + an_box.h = anchors[2 * an_idx + 1] / static_cast(input_size); + float iou = CalcBoxIoU(an_box, gt_shift); + if (iou > best_iou) { + best_iou = iou; + best_n = an_idx; + } + } + + int mask_idx = GetMaskIndex(anchor_mask, best_n); + gt_match_mask_data[i * b + t] = mask_idx; + if (mask_idx >= 0) { + T score = gt_score_data[i * b + t]; + int box_idx = GetEntryIndex( + i, mask_idx, gj * w + gi, mask_num, an_stride, stride, 0); + CalcBoxLocationLoss(loss_data + i, + input_data, + gt, + anchors, + best_n, + box_idx, + gi, + gj, + h, + input_size, + stride, + score); + + int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi; + obj_mask_data[obj_idx] = score; + + int label = gt_label_data[i * b + t]; + int label_idx = GetEntryIndex( + i, mask_idx, gj * w + gi, mask_num, an_stride, stride, 5); + CalcLabelLoss(loss_data + i, + input_data, + label_idx, + label, + class_num, + stride, + label_pos, + label_neg, + score); + } + } + } + + CalcObjnessLoss(loss_data, + input_data + 4 * stride, + obj_mask_data, + n, + mask_num, + h, + w, + stride, + an_stride); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + yolov3_loss, CPU, ALL_LAYOUT, phi::Yolov3LossKernel, float, double) {} diff --git a/paddle/phi/kernels/yolov3_loss_grad_kernel.h b/paddle/phi/kernels/yolov3_loss_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..789e782443f68b6c2a87d4716bbc7f0169823000 --- /dev/null +++ b/paddle/phi/kernels/yolov3_loss_grad_kernel.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void Yolov3LossGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& gt_box, + const DenseTensor& gt_label, + paddle::optional gt_score, + const DenseTensor& loss_grad, + const DenseTensor& objectness_mask, + const DenseTensor& gt_match_mask, + const std::vector& anchors, + const std::vector& anchor_mask, + int class_num, + float ignore_thresh, + int downsample_ratio, + bool use_label_smooth, + float scale_x_Y, + DenseTensor* x_grad, + DenseTensor* gt_box_grad, + DenseTensor* gt_label_grad, + DenseTensor* gt_score_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/yolov3_loss_kernel.h b/paddle/phi/kernels/yolov3_loss_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..eb6668000dee09e89ac62d904f036cf8fe431c75 --- /dev/null +++ b/paddle/phi/kernels/yolov3_loss_kernel.h @@ -0,0 +1,38 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void Yolov3LossKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& gt_box, + const DenseTensor& gt_label, + paddle::optional gt_score, + const std::vector& anchors, + const std::vector& anchor_mask, + int class_num, + float ignore_thresh, + int downsample_ratio, + bool use_label_smooth, + float scale_x_Y, + DenseTensor* loss, + DenseTensor* objectness_mask, + DenseTensor* gt_match_mask); + +} // namespace phi diff --git a/paddle/phi/ops/compat/yolov3_loss_sig.cc b/paddle/phi/ops/compat/yolov3_loss_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..bbdadfa93ba9636daefc27fe69de6f057d3a9931 --- /dev/null +++ b/paddle/phi/ops/compat/yolov3_loss_sig.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature Yolov3LossOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature("yolov3_loss", + {"X", "GTBox", "GTLabel", "GTScore"}, + {"anchors", + "anchor_mask", + "class_num", + "ignore_thresh", + "downsample_ratio", + "use_label_smooth", + "scale_x_y"}, + {"Loss", "ObjectnessMask", "GTMatchMask"}); +} + +KernelSignature Yolov3LossGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("yolov3_loss_grad", + {"X", + "GTBox", + "GTLabel", + "GTScore", + GradVarName("Loss"), + "ObjectnessMask", + "GTMatchMask"}, + {"anchors", + "anchor_mask", + "class_num", + "ignore_thresh", + "downsample_ratio", + "use_label_smooth", + "scale_x_y"}, + {GradVarName("X"), + GradVarName("GTBox"), + GradVarName("GTLabel"), + GradVarName("GTScore")}); +} +} // namespace phi + +PD_REGISTER_ARG_MAPPING_FN(yolov3_loss, phi::Yolov3LossOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(yolov3_loss_grad, + phi::Yolov3LossGradOpArgumentMapping);