From 5d0b568ecb58d479619c5a2295d65b7f677d4648 Mon Sep 17 00:00:00 2001 From: dengkaipeng Date: Tue, 6 Nov 2018 18:42:19 +0800 Subject: [PATCH] Add YOLOv3 loss operator. test=develop --- paddle/fluid/operators/yolov3_loss_op.cc | 130 +++++++++ paddle/fluid/operators/yolov3_loss_op.cu | 23 ++ paddle/fluid/operators/yolov3_loss_op.h | 340 +++++++++++++++++++++++ 3 files changed, 493 insertions(+) create mode 100644 paddle/fluid/operators/yolov3_loss_op.cc create mode 100644 paddle/fluid/operators/yolov3_loss_op.cu create mode 100644 paddle/fluid/operators/yolov3_loss_op.h diff --git a/paddle/fluid/operators/yolov3_loss_op.cc b/paddle/fluid/operators/yolov3_loss_op.cc new file mode 100644 index 0000000000..b4c6a185e2 --- /dev/null +++ b/paddle/fluid/operators/yolov3_loss_op.cc @@ -0,0 +1,130 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/yolov3_loss_op.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class Yolov3LossOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of Yolov3LossOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("GTBox"), + "Input(GTBox) of Yolov3LossOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of Yolov3LossOp should not be null."); + + // PADDLE_ENFORCE(ctx->HasAttr("img_height"), + // "Attr(img_height) of Yolov3LossOp should not be null. "); + // PADDLE_ENFORCE(ctx->HasAttr("anchors"), + // "Attr(anchor) of Yolov3LossOp should not be null.") + // PADDLE_ENFORCE(ctx->HasAttr("class_num"), + // "Attr(class_num) of Yolov3LossOp should not be null."); + // PADDLE_ENFORCE(ctx->HasAttr( + // "ignore_thresh", + // "Attr(ignore_thresh) of Yolov3LossOp should not be null.")); + + auto dim_x = ctx->GetInputDim("X"); + auto dim_gt = ctx->GetInputDim("GTBox"); + auto img_height = ctx->Attrs().Get("img_height"); + auto anchors = ctx->Attrs().Get>("anchors"); + auto box_num = ctx->Attrs().Get("box_num"); + auto class_num = ctx->Attrs().Get("class_num"); + PADDLE_ENFORCE_GT(img_height, 0, + "Attr(img_height) value should be greater then 0"); + PADDLE_ENFORCE_GT(anchors.size(), 0, + "Attr(anchors) length should be greater then 0."); + PADDLE_ENFORCE_EQ(anchors.size() % 2, 0, + "Attr(anchors) length should be even integer."); + PADDLE_ENFORCE_GT(box_num, 0, + "Attr(box_num) should be an integer greater then 0."); + PADDLE_ENFORCE_GT(class_num, 0, + "Attr(class_num) should be an integer greater then 0."); + PADDLE_ENFORCE_EQ(dim_x[1], anchors.size() / 2 * (5 + class_num), + "Input(X) dim[1] should be equal to (anchor_number * (5 " + "+ class_num))."); + PADDLE_ENFORCE_EQ(dim_gt.size(), 3, "Input(GTBox) should be a 3-D tensor"); + PADDLE_ENFORCE_EQ(dim_gt[2], 5, "Input(GTBox) dim[2] should be 5"); + + std::vector dim_out({dim_x[0], 1}); + ctx->SetOutputDim("Out", framework::make_ddim(dim_out)); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace()); + } +}; + +class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "The input tensor of bilinear interpolation, " + "This is a 4-D tensor with shape of [N, C, H, W]"); + AddOutput("Out", + "The output yolo loss tensor, " + "This is a 2-D tensor with shape of [N, 1]"); + + AddAttr("box_num", "The number of boxes generated in each grid."); + AddAttr("class_num", "The number of classes to predict."); + AddComment(R"DOC( + This operator generate yolov3 loss by given predict result and ground + truth boxes. + )DOC"); + } +}; + +class Yolov3LossOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null"); + auto dim_x = ctx->GetInputDim("X"); + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), dim_x); + } + } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(yolov3_loss, ops::Yolov3LossOp, ops::Yolov3LossOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(yolov3_loss_grad, ops::Yolov3LossOpGrad); +REGISTER_OP_CPU_KERNEL( + yolov3_loss, + ops::Yolov3LossKernel); +REGISTER_OP_CPU_KERNEL( + yolov3_loss_grad, + ops::Yolov3LossGradKernel); diff --git a/paddle/fluid/operators/yolov3_loss_op.cu b/paddle/fluid/operators/yolov3_loss_op.cu new file mode 100644 index 0000000000..48f997456a --- /dev/null +++ b/paddle/fluid/operators/yolov3_loss_op.cu @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU + +#include "paddle/fluid/operators/yolov3_loss_op.h" +#include "paddle/fluid/platform/cuda_primitives.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + yolov3_loss, + ops::Yolov3LossOpKernel); +REGISTER_OP_CUDA_KERNEL( + yolov3_loss_grad, + ops::Yolov3LossGradOpKernel); diff --git a/paddle/fluid/operators/yolov3_loss_op.h b/paddle/fluid/operators/yolov3_loss_op.h new file mode 100644 index 0000000000..7950390567 --- /dev/null +++ b/paddle/fluid/operators/yolov3_loss_op.h @@ -0,0 +1,340 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenTensor = framework::EigenTensor; +template +using EigenVector = framework::EigenVector; + +using Array2 = Eigen::DSizes; +using Array4 = Eigen::DSizes; + +template +static inline bool isZero(T x) { + return abs(x) < 1e-6; +} + +template +static inline T sigmod(T x) { + return 1.0 / (exp(-1.0 * x) + 1.0); +} + +template +static inline T CalcMSEWithMask(const Tensor& x, const Tensor& y, + const Tensor& mask) { + auto x_t = EigenVector::Flatten(x); + auto y_t = EigenVector::Flatten(y); + auto mask_t = EigenVector::Flatten(mask); + auto result = ((x_t - y_t) * mask_t).pow(2).sum().eval(); + return result(0); +} + +template +static inline T CalcBCEWithMask(const Tensor& x, const Tensor& y, + const Tensor& mask) { + auto x_t = EigenVector::Flatten(x); + auto y_t = EigenVector::Flatten(y); + auto mask_t = EigenVector::Flatten(mask); + + auto result = + ((y_t * (x_t.log()) + (1.0 - y_t) * ((1.0 - x_t).log())) * mask_t) + .sum() + .eval(); + return result; +} + +template +static inline T CalcCEWithMask(const Tensor& x, const Tensor& y, + const Tensor& mask) { + auto x_t = EigenVector::Flatten(x); + auto y_t = EigenVector::Flatten(y); + auto mask_t = EigenVector::Flatten(mask); +} + +template +static void CalcPredResult(const Tensor& input, Tensor* pred_boxes, + Tensor* pred_confs, Tensor* pred_classes, + Tensor* pred_x, Tensor* pred_y, Tensor* pred_w, + Tensor* pred_h, std::vector anchors, + const int class_num, const int stride) { + const int n = input.dims()[0]; + const int c = input.dims()[1]; + const int h = input.dims()[2]; + const int w = input.dims()[3]; + const int anchor_num = anchors.size() / 2; + const int box_attr_num = 5 + class_num; + + auto input_t = EigenTensor::From(input); + auto pred_boxes_t = EigenTensor::From(*pred_boxes); + auto pred_confs_t = EigenTensor::From(*pred_confs); + auto pred_classes_t = EigenTensor::From(*pred_classes); + auto pred_x_t = EigenTensor::From(*pred_x); + auto pred_y_t = EigenTensor::From(*pred_y); + auto pred_w_t = EigenTensor::From(*pred_w); + auto pred_h_t = EigenTensor::From(*pred_h); + + for (int i = 0; i < n; i++) { + for (int an_idx = 0; an_idx < anchor_num; an_idx++) { + float an_w = anchors[an_idx * 2] / stride; + float an_h = anchors[an_idx * 2 + 1] / stride; + + for (int j = 0; j < h; j++) { + for (int k = 0; k < w; k++) { + pred_x_t(i, an_idx, j, k) = + sigmod(input_t(i, box_attr_num * an_idx, j, k)); + pred_y_t(i, an_idx, j, k) = + sigmod(input_t(i, box_attr_num * an_idx + 1, j, k)); + pred_w_t(i, an_idx, j, k) = + sigmod(input_t(i, box_attr_num * an_idx + 2, j, k)); + pred_h_t(i, an_idx, j, k) = + sigmod(input_t(i, box_attr_num * an_idx + 3, j, k)); + + pred_boxes_t(i, an_idx, j, k, 0) = pred_x_t(i, an_idx, j, k) + k; + pred_boxes_t(i, an_idx, j, k, 1) = pred_y_t(i, an_idx, j, k) + j; + pred_boxes_t(i, an_idx, j, k, 2) = + exp(pred_w_t(i, an_idx, j, k)) * an_w; + pred_boxes_t(i, an_idx, j, k, 3) = + exp(pred_h_t(i, an_idx, j, k)) * an_h; + + pred_confs_t(i, an_idx, j, k) = + sigmod(input_t(i, box_attr_num * an_idx + 4, j, k)); + + for (int c = 0; c < class_num; c++) { + pred_classes_t(i, an_idx, j, k, c) = + sigmod(input_t(i, box_attr_num * an_idx + 5 + c, j, k)); + } + } + } + } + } +} + +template +static T CalcBoxIoU(std::vector box1, std::vector box2, + bool center_mode) { + T b1_x1, b1_x2, b1_y1, b1_y2; + T b2_x1, b2_x2, b2_y1, b2_y2; + if (center_mode) { + b1_x1 = box1[0] - box1[2] / 2; + b1_x2 = box1[0] + box1[2] / 2; + b1_y1 = box1[1] - box1[3] / 2; + b1_y2 = box1[1] + box1[3] / 2; + b2_x1 = box2[0] - box2[2] / 2; + b2_x2 = box2[0] + box2[2] / 2; + b2_y1 = box2[1] - box2[3] / 2; + b2_y2 = box2[1] + box2[3] / 2; + } else { + b1_x1 = box1[0]; + b1_x2 = box1[1]; + b1_y1 = box1[2]; + b1_y2 = box1[3]; + b2_x1 = box2[0]; + b2_x2 = box2[0]; + b2_y1 = box2[1]; + b2_y2 = box2[1]; + } + T b1_area = (b1_x2 - b1_x1 + 1.0) * (b1_y2 - b1_y1 + 1.0); + T b2_area = (b2_x2 - b2_x1 + 1.0) * (b2_y2 - b2_y1 + 1.0); + + T inter_rect_x1 = std::max(b1_x1, b2_x1); + T inter_rect_y1 = std::max(b1_y1, b2_y1); + T inter_rect_x2 = std::min(b1_x2, b2_x2); + T inter_rect_y2 = std::min(b1_y2, b2_y2); + T inter_area = std::max(inter_rect_x2 - inter_rect_x1 + 1.0, 0.0) * + std::max(inter_rect_y2 - inter_rect_y1 + 1.0, 0.0); + + return inter_area / (b1_area + b2_area - inter_area + 1e-16); +} + +template +static inline int GetPredLabel(const Tensor& pred_classes, int n, + int best_an_index, int gj, int gi) { + auto pred_classes_t = EigenTensor::From(pred_classes); + T score = 0.0; + int label = -1; + for (int i = 0; i < pred_classes.dims()[4]; i++) { + if (pred_classes_t(n, best_an_index, gj, gi, i) > score) { + score = pred_classes_t(n, best_an_index, gj, gi, i); + label = i; + } + } + return label; +} + +template +static void CalcPredBoxWithGTBox( + const Tensor& pred_boxes, const Tensor& pred_confs, + const Tensor& pred_classes, const Tensor& gt_boxes, + std::vector anchors, const float ignore_thresh, const int img_height, + int* gt_num, int* correct_num, Tensor* mask_true, Tensor* mask_false, + Tensor* tx, Tensor* ty, Tensor* tw, Tensor* th, Tensor* tconf, + Tensor* tclass) { + const int n = gt_boxes.dims()[0]; + const int b = gt_boxes.dims()[1]; + const int grid_size = pred_boxes.dims()[1]; + const int anchor_num = anchors.size() / 2; + auto pred_boxes_t = EigenTensor::From(pred_boxes); + auto pred_confs_t = EigenTensor::From(pred_confs); + auto pred_classes_t = EigenTensor::From(pred_classes); + auto gt_boxes_t = EigenTensor::From(gt_boxes); + auto mask_true_t = EigenTensor::From(*mask_true).setConstant(0.0); + auto mask_false_t = EigenTensor::From(*mask_false).setConstant(1.0); + auto tx_t = EigenTensor::From(*tx).setConstant(0.0); + auto ty_t = EigenTensor::From(*ty).setConstant(0.0); + auto tw_t = EigenTensor::From(*tw).setConstant(0.0); + auto th_t = EigenTensor::From(*th).setConstant(0.0); + auto tconf_t = EigenTensor::From(*tconf).setConstant(0.0); + auto tclass_t = EigenTensor::From(*tclass).setConstant(0.0); + + *gt_num = 0; + *correct_num = 0; + for (int i = 0; i < n; i++) { + for (int j = 0; j < b; j++) { + if (isZero(gt_boxes_t(i, j, 0)) && isZero(gt_boxes_t(i, j, 1)) && + isZero(gt_boxes_t(i, j, 2)) && isZero(gt_boxes_t(i, j, 3))) { + continue; + } + + *(gt_num)++; + int gt_label = gt_boxes_t(i, j, 0); + T gx = gt_boxes_t(i, j, 1); + T gy = gt_boxes_t(i, j, 2); + T gw = gt_boxes_t(i, j, 3); + T gh = gt_boxes_t(i, j, 4); + int gi = static_cast(gx); + int gj = static_cast(gy); + + T max_iou = static_cast(-1); + T iou; + int best_an_index = -1; + std::vector gt_box({0, 0, gw, gh}); + for (int an_idx = 0; an_idx < anchor_num; an_idx++) { + std::vector anchor_shape({0, 0, static_cast(anchors[2 * an_idx]), + static_cast(anchors[2 * an_idx + 1])}); + iou = CalcBoxIoU(gt_box, anchor_shape, false); + if (iou > max_iou) { + max_iou = iou; + best_an_index = an_idx; + } + if (iou > ignore_thresh) { + mask_false_t(b, an_idx, gj, gi) = 0; + } + } + mask_true_t(b, best_an_index, gj, gi) = 1; + mask_false_t(b, best_an_index, gj, gi) = 1; + tx_t(i, best_an_index, gj, gi) = gx - gi; + ty_t(i, best_an_index, gj, gi) = gy - gj; + tw_t(i, best_an_index, gj, gi) = + log(gw / anchors[2 * best_an_index] + 1e-16); + th_t(i, best_an_index, gj, gi) = + log(gh / anchors[2 * best_an_index + 1] + 1e-16); + tclass_t(b, best_an_index, gj, gi, gt_label) = 1; + tconf_t(b, best_an_index, gj, gi) = 1; + + std::vector pred_box({ + pred_boxes_t(i, best_an_index, gj, gi, 0), + pred_boxes_t(i, best_an_index, gj, gi, 1), + pred_boxes_t(i, best_an_index, gj, gi, 2), + pred_boxes_t(i, best_an_index, gj, gi, 3), + }); + gt_box[0] = gx; + gt_box[1] = gy; + iou = CalcBoxIoU(gt_box, pred_box, true); + int pred_label = GetPredLabel(pred_classes, i, best_an_index, gj, gi); + T score = pred_confs_t(i, best_an_index, gj, gi); + if (iou > 0.5 && pred_label == gt_label && score > 0.5) { + (*correct_num)++; + } + } + } + mask_false_t = mask_true_t - mask_false_t; +} + +template +class Yolov3LossKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("X"); + auto* gt_boxes = ctx.Input("GTBox"); + auto* output = ctx.Output("Out"); + int img_height = ctx.Attr("img_height"); + auto anchors = ctx.Attr>("anchors"); + int class_num = ctx.Attr("class_num"); + float ignore_thresh = ctx.Attr("ignore_thresh"); + + const int n = input->dims()[0]; + const int c = input->dims()[1]; + const int h = input->dims()[2]; + const int w = input->dims()[3]; + const int an_num = anchors.size() / 2; + const float stride = static_cast(img_height) / h; + + Tensor pred_x, pred_y, pred_w, pred_h; + Tensor pred_boxes, pred_confs, pred_classes; + pred_x.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + pred_y.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + pred_w.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + pred_h.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + pred_boxes.mutable_data({n, an_num, h, w, 4}, ctx.GetPlace()); + pred_confs.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + pred_classes.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); + CalcPredResult(*input, &pred_boxes, &pred_confs, &pred_classes, &pred_x, + &pred_y, &pred_w, &pred_h, anchors, class_num, stride); + + Tensor mask_true, mask_false; + Tensor tx, ty, tw, th, tconf, tclass; + mask_true.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + mask_false.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + tx.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + ty.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + tw.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + th.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + tconf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); + tclass.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); + int gt_num = 0; + int correct_num = 0; + CalcPredBoxWithGTBox(pred_boxes, pred_confs, pred_classes, *gt_boxes, + anchors, ignore_thresh, img_height, >_num, + &correct_num, &mask_true, &mask_false, &tx, &ty, + &tw, &th, &tconf, &tclass); + + T loss_x = CalcMSEWithMask(pred_x, tx, mask_true); + T loss_y = CalcMSEWithMask(pred_y, ty, mask_true); + T loss_w = CalcMSEWithMask(pred_w, tw, mask_true); + T loss_h = CalcMSEWithMask(pred_h, th, mask_true); + T loss_conf_true = CalcBCEWithMask(pred_confs, tconf, mask_true); + T loss_conf_false = CalcBCEWithMask(pred_confs, tconf, mask_false); + // T loss_class = CalcCEWithMask() + } +}; + +template +class Yolov3LossGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* d_input_t = ctx.Output(framework::GradVarName("X")); + auto* d_output_t = ctx.Input(framework::GradVarName("Out")); + } +}; + +} // namespace operators +} // namespace paddle -- GitLab