add drrg

1f9400dd · z37757 · 6a8a0eeb · 1f9400dd · 1f9400dd · 1f9400dd
21 changed file
--- a/configs/det/det_r50_drrg_ctw.yml
+++ b/configs/det/det_r50_drrg_ctw.yml
+Global:
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 5
+  save_model_dir: ./output/det_r50_drrg_ctw/
+  save_epoch_step: 100
+  # evaluation is run every 1260 iterations
+  eval_batch_step: [37800, 1260]
+  cal_metric_during_train: False
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir: 
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_drrg/predicts_drrg.txt
+Architecture:
+  model_type: det
+  algorithm: DRRG
+  Transform:
+  Backbone:
+    name: ResNet_vd
+    layers: 50
+    pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained.pdparams 
+  Neck:
+    name: FPN_UNet
+    in_channels: [256, 512, 1024, 2048]
+    out_channels: 32
+  Head:
+    name: DRRGHead
+    in_channels: 32
+    text_region_thr: 0.3
+    center_region_thr: 0.4
+Loss:
+  name: DRRGLoss
+Optimizer:
+  name: Momentum
+  momentum: 0.9
+  lr:
+    name: DecayLearningRate
+    learning_rate: 0.028
+    epochs: 1200
+    factor: 0.9
+    end_lr: 0.0000001
+  weight_decay: 0.0001
+PostProcess:
+  name: DRRGPostprocess
+  link_thr: 0.8
+Metric:
+  name: DetFCEMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/ctw1500/imgs/
+    label_file_list: 
+      - ./train_data/ctw1500/imgs/training.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+          ignore_orientation: True
+      - DetLabelEncode: # Class handling label
+      - ColorJitter: 
+          brightness: 0.12549019607843137
+          saturation: 0.5
+      - RandomScaling: 
+      - RandomCropFlip:
+          crop_ratio: 0.5
+      - RandomCropPolyInstances:
+          crop_ratio: 0.8
+          min_side_ratio: 0.3
+      - RandomRotatePolyInstances:
+          rotate_ratio: 0.5
+          max_angle: 60
+          pad_with_fixed_color: False
+      - SquareResizePad:
+          target_size: 800
+          pad_ratio: 0.6
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+      - DRRGTargets:
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
+            'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
+            'gt_cos_map', 'gt_comp_attribs'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/ctw1500/imgs/
+    label_file_list:
+      - ./train_data/ctw1500/imgs/test.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+          ignore_orientation: True
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_type: 'min'
+          limit_side_len: 640
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - Pad: 
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -44,6 +44,7 @@ from .vqa import *
 from .fce_aug import *
 from .fce_targets import FCENetTargets
 from .ct_process import *
+from .drrg_targets import DRRGTargets
 def transform(data, ops=None):

--- a/ppocr/data/imaug/drrg_targets.py
+++ b/ppocr/data/imaug/drrg_targets.py
--- a/ppocr/ext_op/__init__.py
+++ b/ppocr/ext_op/__init__.py
+from .roi_align_rotated.roi_align_rotated import RoIAlignRotated
--- a/ppocr/ext_op/roi_align_rotated/roi_align_rotated.cc
+++ b/ppocr/ext_op/roi_align_rotated/roi_align_rotated.cc
+// This code is refer from:
+// https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp
+#include <cassert>
+#include <cmath>
+#include <vector>
+#include "paddle/extension.h"
+#define PADDLE_WITH_CUDA
+#define CHECK_INPUT_SAME(x1, x2)                                               \
+  PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
+#define CHECK_INPUT_CPU(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
+template <typename T> struct PreCalc {
+  int pos1;
+  int pos2;
+  int pos3;
+  int pos4;
+  T w1;
+  T w2;
+  T w3;
+  T w4;
+};
+template <typename T>
+void pre_calc_for_bilinear_interpolate(
+    const int height, const int width, const int pooled_height,
+    const int pooled_width, const int iy_upper, const int ix_upper,
+    T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w,
+    int roi_bin_grid_h, int roi_bin_grid_w, T roi_center_h, T roi_center_w,
+    T cos_theta, T sin_theta, std::vector<PreCalc<T>> &pre_calc) {
+  int pre_calc_index = 0;
+  for (int ph = 0; ph < pooled_height; ph++) {
+    for (int pw = 0; pw < pooled_width; pw++) {
+      for (int iy = 0; iy < iy_upper; iy++) {
+        const T yy = roi_start_h + ph * bin_size_h +
+                     static_cast<T>(iy + .5f) * bin_size_h /
+                         static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
+        for (int ix = 0; ix < ix_upper; ix++) {
+          const T xx = roi_start_w + pw * bin_size_w +
+                       static_cast<T>(ix + .5f) * bin_size_w /
+                           static_cast<T>(roi_bin_grid_w);
+          // Rotate by theta around the center and translate
+          // In image space, (y, x) is the order for Right Handed System,
+          // and this is essentially multiplying the point by a rotation matrix
+          // to rotate it counterclockwise through angle theta.
+          T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+          T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+          // deal with: inverse elements are out of feature map boundary
+          if (y < -1.0 || y > height || x < -1.0 || x > width) {
+            // empty
+            PreCalc<T> pc;
+            pc.pos1 = 0;
+            pc.pos2 = 0;
+            pc.pos3 = 0;
+            pc.pos4 = 0;
+            pc.w1 = 0;
+            pc.w2 = 0;
+            pc.w3 = 0;
+            pc.w4 = 0;
+            pre_calc[pre_calc_index] = pc;
+            pre_calc_index += 1;
+            continue;
+          }
+          if (y < 0) {
+            y = 0;
+          }
+          if (x < 0) {
+            x = 0;
+          }
+          int y_low = (int)y;
+          int x_low = (int)x;
+          int y_high;
+          int x_high;
+          if (y_low >= height - 1) {
+            y_high = y_low = height - 1;
+            y = (T)y_low;
+          } else {
+            y_high = y_low + 1;
+          }
+          if (x_low >= width - 1) {
+            x_high = x_low = width - 1;
+            x = (T)x_low;
+          } else {
+            x_high = x_low + 1;
+          }
+          T ly = y - y_low;
+          T lx = x - x_low;
+          T hy = 1. - ly, hx = 1. - lx;
+          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+          // save weights and indices
+          PreCalc<T> pc;
+          pc.pos1 = y_low * width + x_low;
+          pc.pos2 = y_low * width + x_high;
+          pc.pos3 = y_high * width + x_low;
+          pc.pos4 = y_high * width + x_high;
+          pc.w1 = w1;
+          pc.w2 = w2;
+          pc.w3 = w3;
+          pc.w4 = w4;
+          pre_calc[pre_calc_index] = pc;
+          pre_calc_index += 1;
+        }
+      }
+    }
+  }
+}
+template <typename T>
+void roi_align_rotated_cpu_forward(const int nthreads, const T *input,
+                                   const T &spatial_scale, const bool aligned,
+                                   const bool clockwise, const int channels,
+                                   const int height, const int width,
+                                   const int pooled_height,
+                                   const int pooled_width,
+                                   const int sampling_ratio, const T *rois,
+                                   T *output) {
+  int n_rois = nthreads / channels / pooled_width / pooled_height;
+  // (n, c, ph, pw) is an element in the pooled output
+  // can be parallelized using omp
+  // #pragma omp parallel for num_threads(32)
+  for (int n = 0; n < n_rois; n++) {
+    int index_n = n * channels * pooled_width * pooled_height;
+    const T *current_roi = rois + n * 6;
+    int roi_batch_ind = current_roi[0];
+    // Do not use rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_center_w = current_roi[1] * spatial_scale - offset;
+    T roi_center_h = current_roi[2] * spatial_scale - offset;
+    T roi_width = current_roi[3] * spatial_scale;
+    T roi_height = current_roi[4] * spatial_scale;
+    T theta = current_roi[5];
+    if (clockwise) {
+      theta = -theta; // If clockwise, the angle needs to be reversed.
+    }
+    T cos_theta = cos(theta);
+    T sin_theta = sin(theta);
+    if (aligned) {
+      assert(roi_width >= 0 && roi_height >= 0);
+    } else { // for backward-compatibility only
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : ceilf(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width);
+    // We do average (integral) pooling inside a bin
+    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+    // we want to precalculate indices and weights shared by all channels,
+    // this is the key point of optimization
+    std::vector<PreCalc<T>> pre_calc(roi_bin_grid_h * roi_bin_grid_w *
+                                     pooled_width * pooled_height);
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    T roi_start_h = -roi_height / 2.0;
+    T roi_start_w = -roi_width / 2.0;
+    pre_calc_for_bilinear_interpolate(
+        height, width, pooled_height, pooled_width, roi_bin_grid_h,
+        roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w,
+        roi_bin_grid_h, roi_bin_grid_w, roi_center_h, roi_center_w, cos_theta,
+        sin_theta, pre_calc);
+    for (int c = 0; c < channels; c++) {
+      int index_n_c = index_n + c * pooled_width * pooled_height;
+      const T *offset_input =
+          input + (roi_batch_ind * channels + c) * height * width;
+      int pre_calc_index = 0;
+      for (int ph = 0; ph < pooled_height; ph++) {
+        for (int pw = 0; pw < pooled_width; pw++) {
+          int index = index_n_c + ph * pooled_width + pw;
+          T output_val = 0.;
+          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+              PreCalc<T> pc = pre_calc[pre_calc_index];
+              output_val += pc.w1 * offset_input[pc.pos1] +
+                            pc.w2 * offset_input[pc.pos2] +
+                            pc.w3 * offset_input[pc.pos3] +
+                            pc.w4 * offset_input[pc.pos4];
+              pre_calc_index += 1;
+            }
+          }
+          output_val /= count;
+          output[index] = output_val;
+        } // for pw
+      }   // for ph
+    }     // for c
+  }       // for n
+}
+template <typename T>
+void bilinear_interpolate_gradient(const int height, const int width, T y, T x,
+                                   T &w1, T &w2, T &w3, T &w4, int &x_low,
+                                   int &x_high, int &y_low, int &y_high) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+  if (y < 0) {
+    y = 0;
+  }
+  if (x < 0) {
+    x = 0;
+  }
+  y_low = (int)y;
+  x_low = (int)x;
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+  return;
+}
+template <class T> inline void add(T *address, const T &val) {
+  *address += val;
+}
+template <typename T>
+void roi_align_rotated_cpu_backward(
+    const int nthreads,
+    // may not be contiguous. should index using n_stride, etc
+    const T *grad_output, const T &spatial_scale, const bool aligned,
+    const bool clockwise, const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, const int sampling_ratio,
+    T *grad_input, const T *rois, const int n_stride, const int c_stride,
+    const int h_stride, const int w_stride) {
+  for (int index = 0; index < nthreads; index++) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+    const T *current_roi = rois + n * 6;
+    int roi_batch_ind = current_roi[0];
+    // Do not use rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_center_w = current_roi[1] * spatial_scale - offset;
+    T roi_center_h = current_roi[2] * spatial_scale - offset;
+    T roi_width = current_roi[3] * spatial_scale;
+    T roi_height = current_roi[4] * spatial_scale;
+    T theta = current_roi[5];
+    if (clockwise) {
+      theta = -theta; // If clockwise, the angle needs to be reversed.
+    }
+    T cos_theta = cos(theta);
+    T sin_theta = sin(theta);
+    if (aligned) {
+      assert(roi_width >= 0 && roi_height >= 0);
+    } else { // for backward-compatibility only
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+    T *offset_grad_input =
+        grad_input + ((roi_batch_ind * channels + c) * height * width);
+    int output_offset = n * n_stride + c * c_stride;
+    const T *offset_grad_output = grad_output + output_offset;
+    const T grad_output_this_bin =
+        offset_grad_output[ph * h_stride + pw * w_stride];
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : ceilf(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width);
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    T roi_start_h = -roi_height / 2.0;
+    T roi_start_w = -roi_width / 2.0;
+    // We do average (integral) pooling inside a bin
+    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+      const T yy = roi_start_h + ph * bin_size_h +
+                   static_cast<T>(iy + .5f) * bin_size_h /
+                       static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const T xx = roi_start_w + pw * bin_size_w +
+                     static_cast<T>(ix + .5f) * bin_size_w /
+                         static_cast<T>(roi_bin_grid_w);
+        // Rotate by theta around the center and translate
+        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+        T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+        T w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                      x_low, x_high, y_low, y_high);
+        T g1 = grad_output_this_bin * w1 / count;
+        T g2 = grad_output_this_bin * w2 / count;
+        T g3 = grad_output_this_bin * w3 / count;
+        T g4 = grad_output_this_bin * w4 / count;
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          // atomic add is not needed for now since it is single threaded
+          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
+          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
+          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
+          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
+        } // if
+      }   // ix
+    }     // iy
+  }       // for
+} // ROIAlignRotatedBackward
+std::vector<paddle::Tensor>
+RoIAlignRotatedCPUForward(const paddle::Tensor &input,
+                          const paddle::Tensor &rois, int aligned_height,
+                          int aligned_width, float spatial_scale,
+                          int sampling_ratio, bool aligned, bool clockwise) {
+  CHECK_INPUT_CPU(input);
+  CHECK_INPUT_CPU(rois);
+  auto num_rois = rois.shape()[0];
+  auto channels = input.shape()[1];
+  auto height = input.shape()[2];
+  auto width = input.shape()[3];
+  auto output =
+      paddle::empty({num_rois, channels, aligned_height, aligned_width},
+                    input.type(), paddle::CPUPlace());
+  auto output_size = output.numel();
+  PD_DISPATCH_FLOATING_TYPES(
+      input.type(), "roi_align_rotated_cpu_forward", ([&] {
+        roi_align_rotated_cpu_forward<data_t>(
+            output_size, input.data<data_t>(),
+            static_cast<data_t>(spatial_scale), aligned, clockwise, channels,
+            height, width, aligned_height, aligned_width, sampling_ratio,
+            rois.data<data_t>(), output.data<data_t>());
+      }));
+  return {output};
+}
+std::vector<paddle::Tensor> RoIAlignRotatedCPUBackward(
+    const paddle::Tensor &input, const paddle::Tensor &rois,
+    const paddle::Tensor &grad_output, int aligned_height, int aligned_width,
+    float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) {
+  auto batch_size = input.shape()[0];
+  auto channels = input.shape()[1];
+  auto height = input.shape()[2];
+  auto width = input.shape()[3];
+  auto grad_input = paddle::full({batch_size, channels, height, width}, 0.0,
+                                 input.type(), paddle::CPUPlace());
+  // get stride values to ensure indexing into gradients is correct.
+  int n_stride = grad_output.shape()[0];
+  int c_stride = grad_output.shape()[1];
+  int h_stride = grad_output.shape()[2];
+  int w_stride = grad_output.shape()[3];
+  PD_DISPATCH_FLOATING_TYPES(
+      grad_output.type(), "roi_align_rotated_cpu_backward", [&] {
+        roi_align_rotated_cpu_backward<data_t>(
+            grad_output.numel(), grad_output.data<data_t>(),
+            static_cast<data_t>(spatial_scale), aligned, clockwise, channels,
+            height, width, aligned_height, aligned_width, sampling_ratio,
+            grad_input.data<data_t>(), rois.data<data_t>(), n_stride, c_stride,
+            h_stride, w_stride);
+      });
+  return {grad_input};
+}
+#ifdef PADDLE_WITH_CUDA
+std::vector<paddle::Tensor>
+RoIAlignRotatedCUDAForward(const paddle::Tensor &input,
+                           const paddle::Tensor &rois, int aligned_height,
+                           int aligned_width, float spatial_scale,
+                           int sampling_ratio, bool aligned, bool clockwise);
+#endif
+#ifdef PADDLE_WITH_CUDA
+std::vector<paddle::Tensor> RoIAlignRotatedCUDABackward(
+    const paddle::Tensor &input, const paddle::Tensor &rois,
+    const paddle::Tensor &grad_output, int aligned_height, int aligned_width,
+    float spatial_scale, int sampling_ratio, bool aligned, bool clockwise);
+#endif
+std::vector<paddle::Tensor>
+RoIAlignRotatedForward(const paddle::Tensor &input, const paddle::Tensor &rois,
+                       int aligned_height, int aligned_width,
+                       float spatial_scale, int sampling_ratio, bool aligned,
+                       bool clockwise) {
+  CHECK_INPUT_SAME(input, rois);
+  if (input.is_cpu()) {
+    return RoIAlignRotatedCPUForward(input, rois, aligned_height, aligned_width,
+                                     spatial_scale, sampling_ratio, aligned,
+                                     clockwise);
+#ifdef PADDLE_WITH_CUDA
+  } else if (input.is_gpu()) {
+    return RoIAlignRotatedCUDAForward(input, rois, aligned_height,
+                                      aligned_width, spatial_scale,
+                                      sampling_ratio, aligned, clockwise);
+#endif
+  } else {
+    PD_THROW("Unsupported device type for forward function of roi align "
+             "rotated operator.");
+  }
+}
+std::vector<paddle::Tensor>
+RoIAlignRotatedBackward(const paddle::Tensor &input, const paddle::Tensor &rois,
+                        const paddle::Tensor &grad_output, int aligned_height,
+                        int aligned_width, float spatial_scale,
+                        int sampling_ratio, bool aligned, bool clockwise) {
+  CHECK_INPUT_SAME(input, rois);
+  if (input.is_cpu()) {
+    return RoIAlignRotatedCPUBackward(input, rois, grad_output, aligned_height,
+                                      aligned_width, spatial_scale,
+                                      sampling_ratio, aligned, clockwise);
+#ifdef PADDLE_WITH_CUDA
+  } else if (input.is_gpu()) {
+    return RoIAlignRotatedCUDABackward(input, rois, grad_output, aligned_height,
+                                       aligned_width, spatial_scale,
+                                       sampling_ratio, aligned, clockwise);
+#endif
+  } else {
+    PD_THROW("Unsupported device type for forward function of roi align "
+             "rotated operator.");
+  }
+}
+std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> input_shape,
+                                             std::vector<int64_t> rois_shape) {
+  return {{rois_shape[0], input_shape[1], input_shape[2], input_shape[3]}};
+}
+std::vector<std::vector<int64_t>>
+InferBackShape(std::vector<int64_t> input_shape,
+               std::vector<int64_t> rois_shape) {
+  return {input_shape};
+}
+std::vector<paddle::DataType> InferDtype(paddle::DataType input_dtype,
+                                         paddle::DataType rois_dtype) {
+  return {input_dtype};
+}
+PD_BUILD_OP(roi_align_rotated)
+    .Inputs({"Input", "Rois"})
+    .Outputs({"Output"})
+    .Attrs({"aligned_height: int", "aligned_width: int", "spatial_scale: float",
+            "sampling_ratio: int", "aligned: bool", "clockwise: bool"})
+    .SetKernelFn(PD_KERNEL(RoIAlignRotatedForward))
+    .SetInferShapeFn(PD_INFER_SHAPE(InferShape))
+    .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));
+PD_BUILD_GRAD_OP(roi_align_rotated)
+    .Inputs({"Input", "Rois", paddle::Grad("Output")})
+    .Attrs({"aligned_height: int", "aligned_width: int", "spatial_scale: float",
+            "sampling_ratio: int", "aligned: bool", "clockwise: bool"})
+    .Outputs({paddle::Grad("Input")})
+    .SetKernelFn(PD_KERNEL(RoIAlignRotatedBackward))
+    .SetInferShapeFn(PD_INFER_SHAPE(InferBackShape));
--- a/ppocr/ext_op/roi_align_rotated/roi_align_rotated.cu
+++ b/ppocr/ext_op/roi_align_rotated/roi_align_rotated.cu
+// This code is refer from:
+// https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
+#include <cassert>
+#include <cmath>
+#include <vector>
+#include "paddle/extension.h"
+#include <cuda.h>
+#define CUDA_1D_KERNEL_LOOP(i, n)                                              \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n);                 \
+       i += blockDim.x * gridDim.x)
+#define THREADS_PER_BLOCK 512
+inline int GET_BLOCKS(const int N) {
+  int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+  int max_block_num = 4096;
+  return min(optimal_block_num, max_block_num);
+}
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
+static __inline__ __device__ double atomicAdd(double *address, double val) {
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
+  unsigned long long int old = *address_as_ull, assumed;
+  if (val == 0.0)
+    return __longlong_as_double(old);
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+  } while (assumed != old);
+  return __longlong_as_double(old);
+}
+#endif
+template <typename T>
+__device__ T bilinear_interpolate(const T *input, const int height,
+                                  const int width, T y, T x,
+                                  const int index /* index for debug only*/) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width)
+    return 0;
+  if (y <= 0)
+    y = 0;
+  if (x <= 0)
+    x = 0;
+  int y_low = (int)y;
+  int x_low = (int)x;
+  int y_high;
+  int x_high;
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+  // do bilinear interpolation
+  T v1 = input[y_low * width + x_low];
+  T v2 = input[y_low * width + x_high];
+  T v3 = input[y_high * width + x_low];
+  T v4 = input[y_high * width + x_high];
+  T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+template <typename T>
+__device__ void
+bilinear_interpolate_gradient(const int height, const int width, T y, T x,
+                              T &w1, T &w2, T &w3, T &w4, int &x_low,
+                              int &x_high, int &y_low, int &y_high,
+                              const int index /* index for debug only*/) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+  if (y <= 0)
+    y = 0;
+  if (x <= 0)
+    x = 0;
+  y_low = (int)y;
+  x_low = (int)x;
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+  return;
+}
+/*** Forward ***/
+template <typename scalar_t>
+__global__ void roi_align_rotated_cuda_forward_kernel(
+    const int nthreads, const scalar_t *bottom_data,
+    const scalar_t *bottom_rois, const scalar_t spatial_scale,
+    const int sample_num, const bool aligned, const bool clockwise,
+    const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, scalar_t *top_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+    const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
+    int roi_batch_ind = offset_bottom_rois[0];
+    // Do not using rounding; this implementation detail is critical
+    scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0;
+    scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale - offset;
+    scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale - offset;
+    scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
+    scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
+    // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
+    scalar_t theta = offset_bottom_rois[5];
+    if (clockwise) {
+      theta = -theta; // If clockwise, the angle needs to be reversed.
+    }
+    if (!aligned) { // for backward-compatibility only
+      // Force malformed ROIs to be 1x1
+      roi_width = max(roi_width, (scalar_t)1.);
+      roi_height = max(roi_height, (scalar_t)1.);
+    }
+    scalar_t bin_size_h = static_cast<scalar_t>(roi_height) /
+                          static_cast<scalar_t>(pooled_height);
+    scalar_t bin_size_w =
+        static_cast<scalar_t>(roi_width) / static_cast<scalar_t>(pooled_width);
+    const scalar_t *offset_bottom_data =
+        bottom_data + (roi_batch_ind * channels + c) * height * width;
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sample_num > 0)
+                             ? sample_num
+                             : ceilf(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width);
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    scalar_t roi_start_h = -roi_height / 2.0;
+    scalar_t roi_start_w = -roi_width / 2.0;
+    scalar_t cosscalar_theta = cos(theta);
+    scalar_t sinscalar_theta = sin(theta);
+    // We do average (integral) pooling inside a bin
+    const scalar_t count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+    scalar_t output_val = 0.;
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1
+      const scalar_t yy =
+          roi_start_h + ph * bin_size_h +
+          static_cast<scalar_t>(iy + .5f) * bin_size_h /
+              static_cast<scalar_t>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const scalar_t xx = roi_start_w + pw * bin_size_w +
+                            static_cast<scalar_t>(ix + .5f) * bin_size_w /
+                                static_cast<scalar_t>(roi_bin_grid_w);
+        // Rotate by theta (counterclockwise) around the center and translate
+        scalar_t y = yy * cosscalar_theta - xx * sinscalar_theta + roi_center_h;
+        scalar_t x = yy * sinscalar_theta + xx * cosscalar_theta + roi_center_w;
+        scalar_t val = bilinear_interpolate<scalar_t>(
+            offset_bottom_data, height, width, y, x, index);
+        output_val += val;
+      }
+    }
+    output_val /= count;
+    top_data[index] = output_val;
+  }
+}
+/*** Backward ***/
+template <typename scalar_t>
+__global__ void roi_align_rotated_backward_cuda_kernel(
+    const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois,
+    const scalar_t spatial_scale, const int sample_num, const bool aligned,
+    const bool clockwise, const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, scalar_t *bottom_diff) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+    const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
+    int roi_batch_ind = offset_bottom_rois[0];
+    // Do not round
+    scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0;
+    scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale - offset;
+    scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale - offset;
+    scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
+    scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
+    // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
+    scalar_t theta = offset_bottom_rois[5];
+    if (clockwise) {
+      theta = -theta; // If clockwise, the angle needs to be reversed.
+    }
+    if (!aligned) { // for backward-compatibility only
+      // Force malformed ROIs to be 1x1
+      roi_width = max(roi_width, (scalar_t)1.);
+      roi_height = max(roi_height, (scalar_t)1.);
+    }
+    scalar_t bin_size_h = static_cast<scalar_t>(roi_height) /
+                          static_cast<scalar_t>(pooled_height);
+    scalar_t bin_size_w =
+        static_cast<scalar_t>(roi_width) / static_cast<scalar_t>(pooled_width);
+    scalar_t *offset_bottom_diff =
+        bottom_diff + (roi_batch_ind * channels + c) * height * width;
+    int top_offset = (n * channels + c) * pooled_height * pooled_width;
+    const scalar_t *offset_top_diff = top_diff + top_offset;
+    const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sample_num > 0)
+                             ? sample_num
+                             : ceilf(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width);
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    scalar_t roi_start_h = -roi_height / 2.0;
+    scalar_t roi_start_w = -roi_width / 2.0;
+    scalar_t cosTheta = cos(theta);
+    scalar_t sinTheta = sin(theta);
+    // We do average (integral) pooling inside a bin
+    const scalar_t count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1
+      const scalar_t yy =
+          roi_start_h + ph * bin_size_h +
+          static_cast<scalar_t>(iy + .5f) * bin_size_h /
+              static_cast<scalar_t>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const scalar_t xx = roi_start_w + pw * bin_size_w +
+                            static_cast<scalar_t>(ix + .5f) * bin_size_w /
+                                static_cast<scalar_t>(roi_bin_grid_w);
+        // Rotate by theta around the center and translate
+        scalar_t y = yy * cosTheta - xx * sinTheta + roi_center_h;
+        scalar_t x = yy * sinTheta + xx * cosTheta + roi_center_w;
+        scalar_t w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+        bilinear_interpolate_gradient<scalar_t>(height, width, y, x, w1, w2, w3,
+                                                w4, x_low, x_high, y_low,
+                                                y_high, index);
+        scalar_t g1 = top_diff_this_bin * w1 / count;
+        scalar_t g2 = top_diff_this_bin * w2 / count;
+        scalar_t g3 = top_diff_this_bin * w3 / count;
+        scalar_t g4 = top_diff_this_bin * w4 / count;
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          atomicAdd(offset_bottom_diff + y_low * width + x_low, g1);
+          atomicAdd(offset_bottom_diff + y_low * width + x_high, g2);
+          atomicAdd(offset_bottom_diff + y_high * width + x_low, g3);
+          atomicAdd(offset_bottom_diff + y_high * width + x_high, g4);
+        } // if
+      }   // ix
+    }     // iy
+  }       // CUDA_1D_KERNEL_LOOP
+} // RoIAlignBackward
+std::vector<paddle::Tensor>
+RoIAlignRotatedCUDAForward(const paddle::Tensor &input,
+                           const paddle::Tensor &rois, int aligned_height,
+                           int aligned_width, float spatial_scale,
+                           int sampling_ratio, bool aligned, bool clockwise) {
+  auto num_rois = rois.shape()[0];
+  auto channels = input.shape()[1];
+  auto height = input.shape()[2];
+  auto width = input.shape()[3];
+  auto output =
+      paddle::empty({num_rois, channels, aligned_height, aligned_width},
+                    input.type(), paddle::GPUPlace());
+  auto output_size = output.numel();
+  PD_DISPATCH_FLOATING_TYPES(
+      input.type(), "roi_align_rotated_cuda_forward_kernel", ([&] {
+        roi_align_rotated_cuda_forward_kernel<
+            data_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+            output_size, input.data<data_t>(), rois.data<data_t>(),
+            static_cast<data_t>(spatial_scale), sampling_ratio, aligned,
+            clockwise, channels, height, width, aligned_height, aligned_width,
+            output.data<data_t>());
+      }));
+  return {output};
+}
+std::vector<paddle::Tensor> RoIAlignRotatedCUDABackward(
+    const paddle::Tensor &input, const paddle::Tensor &rois,
+    const paddle::Tensor &grad_output, int aligned_height, int aligned_width,
+    float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) {
+  auto num_rois = rois.shape()[0];
+  auto batch_size = input.shape()[0];
+  auto channels = input.shape()[1];
+  auto height = input.shape()[2];
+  auto width = input.shape()[3];
+  auto grad_input = paddle::full({batch_size, channels, height, width}, 0.0,
+                                 input.type(), paddle::GPUPlace());
+  const int output_size = num_rois * aligned_height * aligned_width * channels;
+  PD_DISPATCH_FLOATING_TYPES(
+      grad_output.type(), "roi_align_rotated_backward_cuda_kernel", ([&] {
+        roi_align_rotated_backward_cuda_kernel<
+            data_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+            output_size, grad_output.data<data_t>(), rois.data<data_t>(),
+            spatial_scale, sampling_ratio, aligned, clockwise, channels, height,
+            width, aligned_height, aligned_width, grad_input.data<data_t>());
+      }));
+  return {grad_input};
+}
\ No newline at end of file
--- a/ppocr/ext_op/roi_align_rotated/roi_align_rotated.py
+++ b/ppocr/ext_op/roi_align_rotated/roi_align_rotated.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/roi_align_rotated.py
+"""
+import paddle
+import paddle.nn as nn
+from paddle.utils.cpp_extension import load
+custom_ops = load(
+    name="custom_jit_ops",
+    sources=[
+        "ppocr/ext_op/roi_align_rotated/roi_align_rotated.cc",
+        "ppocr/ext_op/roi_align_rotated/roi_align_rotated.cu"
+    ])
+roi_align_rotated = custom_ops.roi_align_rotated
+class RoIAlignRotated(nn.Layer):
+    """RoI align pooling layer for rotated proposals.
+    """
+    def __init__(self,
+                 out_size,
+                 spatial_scale,
+                 sample_num=0,
+                 aligned=True,
+                 clockwise=False):
+        super(RoIAlignRotated, self).__init__()
+        if isinstance(out_size, int):
+            self.out_h = out_size
+            self.out_w = out_size
+        elif isinstance(out_size, tuple):
+            assert len(out_size) == 2
+            assert isinstance(out_size[0], int)
+            assert isinstance(out_size[1], int)
+            self.out_h, self.out_w = out_size
+        else:
+            raise TypeError(
+                '"out_size" must be an integer or tuple of integers')
+        self.spatial_scale = float(spatial_scale)
+        self.sample_num = int(sample_num)
+        self.aligned = aligned
+        self.clockwise = clockwise
+    def forward(self, feats, rois):
+        output = roi_align_rotated(feats, rois, self.out_h, self.out_w,
+                                   self.spatial_scale, self.sample_num,
+                                   self.aligned, self.clockwise)
+        return output
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -26,6 +26,7 @@ from .det_sast_loss import SASTLoss
 from .det_pse_loss import PSELoss
 from .det_fce_loss import FCELoss
 from .det_ct_loss import CTLoss
+from .det_drrg_loss import DRRGLoss
 # rec loss
 from .rec_ctc_loss import CTCLoss
@@ -69,7 +70,7 @@ def build_loss(config):
        'CELoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
        'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss',
        'TableMasterLoss', 'SPINAttentionLoss', 'VLLoss', 'StrokeFocusLoss',
-        'SLALoss', 'CTLoss'
+        'SLALoss', 'CTLoss', 'DRRGLoss'
    ]
    config = copy.deepcopy(config)
    module_name = config.pop('name')

--- a/ppocr/losses/det_drrg_loss.py
+++ b/ppocr/losses/det_drrg_loss.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/losses/drrg_loss.py
+"""
+import paddle
+import paddle.nn.functional as F
+from paddle import nn
+class DRRGLoss(nn.Layer):
+    def __init__(self, ohem_ratio=3.0):
+        super().__init__()
+        self.ohem_ratio = ohem_ratio
+        self.downsample_ratio = 1.0
+    def balance_bce_loss(self, pred, gt, mask):
+        """Balanced Binary-CrossEntropy Loss.
+        Args:
+            pred (Tensor): Shape of :math:`(1, H, W)`.
+            gt (Tensor): Shape of :math:`(1, H, W)`.
+            mask (Tensor): Shape of :math:`(1, H, W)`.
+        Returns:
+            Tensor: Balanced bce loss.
+        """
+        assert pred.shape == gt.shape == mask.shape
+        assert paddle.all(pred >= 0) and paddle.all(pred <= 1)
+        assert paddle.all(gt >= 0) and paddle.all(gt <= 1)
+        positive = gt * mask
+        negative = (1 - gt) * mask
+        positive_count = int(positive.sum())
+        if positive_count > 0:
+            loss = F.binary_cross_entropy(pred, gt, reduction='none')
+            positive_loss = paddle.sum(loss * positive)
+            negative_loss = loss * negative
+            negative_count = min(
+                int(negative.sum()), int(positive_count * self.ohem_ratio))
+        else:
+            positive_loss = paddle.to_tensor(0.0)
+            loss = F.binary_cross_entropy(pred, gt, reduction='none')
+            negative_loss = loss * negative
+            negative_count = 100
+        negative_loss, _ = paddle.topk(
+            negative_loss.reshape([-1]), negative_count)
+        balance_loss = (positive_loss + paddle.sum(negative_loss)) / (
+            float(positive_count + negative_count) + 1e-5)
+        return balance_loss
+    def gcn_loss(self, gcn_data):
+        """CrossEntropy Loss from gcn module.
+        Args:
+            gcn_data (tuple(Tensor, Tensor)): The first is the
+                prediction with shape :math:`(N, 2)` and the
+                second is the gt label with shape :math:`(m, n)`
+                where :math:`m * n = N`.
+        Returns:
+            Tensor: CrossEntropy loss.
+        """
+        gcn_pred, gt_labels = gcn_data
+        gt_labels = gt_labels.reshape([-1])
+        loss = F.cross_entropy(gcn_pred, gt_labels)
+        return loss
+    def bitmasks2tensor(self, bitmasks, target_sz):
+        """Convert Bitmasks to tensor.
+        Args:
+            bitmasks (list[BitmapMasks]): The BitmapMasks list. Each item is
+                for one img.
+            target_sz (tuple(int, int)): The target tensor of size
+                :math:`(H, W)`.
+        Returns:
+            list[Tensor]: The list of kernel tensors. Each element stands for
+            one kernel level.
+        """
+        batch_size = len(bitmasks)
+        results = []
+        kernel = []
+        for batch_inx in range(batch_size):
+            mask = bitmasks[batch_inx]
+            # hxw
+            mask_sz = mask.shape
+            # left, right, top, bottom
+            pad = [0, target_sz[1] - mask_sz[1], 0, target_sz[0] - mask_sz[0]]
+            mask = F.pad(mask, pad, mode='constant', value=0)
+            kernel.append(mask)
+        kernel = paddle.stack(kernel)
+        results.append(kernel)
+        return results
+    def forward(self, preds, labels):
+        """Compute Drrg loss.
+        """
+        assert isinstance(preds, tuple)
+        gt_text_mask, gt_center_region_mask, gt_mask, gt_top_height_map, gt_bot_height_map, gt_sin_map, gt_cos_map = labels[
+            1:8]
+        downsample_ratio = self.downsample_ratio
+        pred_maps, gcn_data = preds
+        pred_text_region = pred_maps[:, 0, :, :]
+        pred_center_region = pred_maps[:, 1, :, :]
+        pred_sin_map = pred_maps[:, 2, :, :]
+        pred_cos_map = pred_maps[:, 3, :, :]
+        pred_top_height_map = pred_maps[:, 4, :, :]
+        pred_bot_height_map = pred_maps[:, 5, :, :]
+        feature_sz = pred_maps.shape
+        # bitmask 2 tensor
+        mapping = {
+            'gt_text_mask': paddle.cast(gt_text_mask, 'float32'),
+            'gt_center_region_mask':
+            paddle.cast(gt_center_region_mask, 'float32'),
+            'gt_mask': paddle.cast(gt_mask, 'float32'),
+            'gt_top_height_map': paddle.cast(gt_top_height_map, 'float32'),
+            'gt_bot_height_map': paddle.cast(gt_bot_height_map, 'float32'),
+            'gt_sin_map': paddle.cast(gt_sin_map, 'float32'),
+            'gt_cos_map': paddle.cast(gt_cos_map, 'float32')
+        }
+        gt = {}
+        for key, value in mapping.items():
+            gt[key] = value
+            if abs(downsample_ratio - 1.0) < 1e-2:
+                gt[key] = self.bitmasks2tensor(gt[key], feature_sz[2:])
+            else:
+                gt[key] = [item.rescale(downsample_ratio) for item in gt[key]]
+                gt[key] = self.bitmasks2tensor(gt[key], feature_sz[2:])
+                if key in ['gt_top_height_map', 'gt_bot_height_map']:
+                    gt[key] = [item * downsample_ratio for item in gt[key]]
+            gt[key] = [item for item in gt[key]]
+        scale = paddle.sqrt(1.0 / (pred_sin_map**2 + pred_cos_map**2 + 1e-8))
+        pred_sin_map = pred_sin_map * scale
+        pred_cos_map = pred_cos_map * scale
+        loss_text = self.balance_bce_loss(
+            F.sigmoid(pred_text_region), gt['gt_text_mask'][0],
+            gt['gt_mask'][0])
+        text_mask = (gt['gt_text_mask'][0] * gt['gt_mask'][0])
+        negative_text_mask = ((1 - gt['gt_text_mask'][0]) * gt['gt_mask'][0])
+        loss_center_map = F.binary_cross_entropy(
+            F.sigmoid(pred_center_region),
+            gt['gt_center_region_mask'][0],
+            reduction='none')
+        if int(text_mask.sum()) > 0:
+            loss_center_positive = paddle.sum(loss_center_map *
+                                              text_mask) / paddle.sum(text_mask)
+        else:
+            loss_center_positive = paddle.to_tensor(0.0)
+        loss_center_negative = paddle.sum(
+            loss_center_map *
+            negative_text_mask) / paddle.sum(negative_text_mask)
+        loss_center = loss_center_positive + 0.5 * loss_center_negative
+        center_mask = (gt['gt_center_region_mask'][0] * gt['gt_mask'][0])
+        if int(center_mask.sum()) > 0:
+            map_sz = pred_top_height_map.shape
+            ones = paddle.ones(map_sz, dtype='float32')
+            loss_top = F.smooth_l1_loss(
+                pred_top_height_map / (gt['gt_top_height_map'][0] + 1e-2),
+                ones,
+                reduction='none')
+            loss_bot = F.smooth_l1_loss(
+                pred_bot_height_map / (gt['gt_bot_height_map'][0] + 1e-2),
+                ones,
+                reduction='none')
+            gt_height = (
+                gt['gt_top_height_map'][0] + gt['gt_bot_height_map'][0])
+            loss_height = paddle.sum(
+                (paddle.log(gt_height + 1) *
+                 (loss_top + loss_bot)) * center_mask) / paddle.sum(center_mask)
+            loss_sin = paddle.sum(
+                F.smooth_l1_loss(
+                    pred_sin_map, gt['gt_sin_map'][0],
+                    reduction='none') * center_mask) / paddle.sum(center_mask)
+            loss_cos = paddle.sum(
+                F.smooth_l1_loss(
+                    pred_cos_map, gt['gt_cos_map'][0],
+                    reduction='none') * center_mask) / paddle.sum(center_mask)
+        else:
+            loss_height = paddle.to_tensor(0.0)
+            loss_sin = paddle.to_tensor(0.0)
+            loss_cos = paddle.to_tensor(0.0)
+        loss_gcn = self.gcn_loss(gcn_data)
+        loss = loss_text + loss_center + loss_height + loss_sin + loss_cos + loss_gcn
+        results = dict(
+            loss=loss,
+            loss_text=loss_text,
+            loss_center=loss_center,
+            loss_height=loss_height,
+            loss_sin=loss_sin,
+            loss_cos=loss_cos,
+            loss_gcn=loss_gcn)
+        return results
--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
@@ -16,6 +16,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import os
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn
@@ -25,6 +27,8 @@ from paddle.vision.ops import DeformConv2D
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import Normal, Constant, XavierUniform
+from ppocr.utils.logging import get_logger
 __all__ = ["ResNet_vd", "ConvBNLayer", "DeformableConvV2"]
@@ -246,6 +250,7 @@ class ResNet_vd(nn.Layer):
                 layers=50,
                 dcn_stage=None,
                 out_indices=None,
+                 pretrained_model=None,
                 **kwargs):
        super(ResNet_vd, self).__init__()
@@ -339,6 +344,30 @@ class ResNet_vd(nn.Layer):
                    self.out_channels.append(num_filters[block])
                self.stages.append(nn.Sequential(*block_list))
+        if pretrained_model is not None:
+            self.load_pretrained_params(pretrained_model)
+    def load_pretrained_params(self, path):
+        logger = get_logger()
+        if path.endswith('.pdparams'):
+            path = path.replace('.pdparams', '')
+        assert os.path.exists(path + ".pdparams"), \
+            "The {}.pdparams does not exists!".format(path)
+        params = paddle.load(path + '.pdparams')
+        state_dict = self.state_dict()
+        new_state_dict = {}
+        for k1, k2 in zip(state_dict.keys(), params.keys()):
+            if list(state_dict[k1].shape) == list(params[k2].shape):
+                new_state_dict[k1] = params[k2]
+            else:
+                logger.info(
+                    f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
+                )
+        self.set_state_dict(new_state_dict)
+        logger.info(f"loaded backbone pretrained_model successful from {path}")
    def forward(self, inputs):
        y = self.conv1_1(inputs)
        y = self.conv1_2(y)

--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -24,6 +24,7 @@ def build_head(config):
    from .det_fce_head import FCEHead
    from .e2e_pg_head import PGHead
    from .det_ct_head import CT_Head
+    from .det_drrg_head import DRRGHead
    # rec head
    from .rec_ctc_head import CTCHead
@@ -53,7 +54,7 @@ def build_head(config):
        'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
        'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead',
        'MultiHead', 'ABINetHead', 'TableMasterHead', 'SPINAttentionHead',
-        'VLHead', 'SLAHead', 'RobustScannerHead', 'CT_Head'
+        'VLHead', 'SLAHead', 'RobustScannerHead', 'CT_Head', 'DRRGHead'
    ]
    #table head

--- a/ppocr/modeling/heads/det_drrg_head.py
+++ b/ppocr/modeling/heads/det_drrg_head.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/dense_heads/drrg_head.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import warnings
+import cv2
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from .gcn import GCN
+from .local_graph import LocalGraphs
+from .proposal_local_graph import ProposalLocalGraphs
+class DRRGHead(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 k_at_hops=(8, 4),
+                 num_adjacent_linkages=3,
+                 node_geo_feat_len=120,
+                 pooling_scale=1.0,
+                 pooling_output_size=(4, 3),
+                 nms_thr=0.3,
+                 min_width=8.0,
+                 max_width=24.0,
+                 comp_shrink_ratio=1.03,
+                 comp_ratio=0.4,
+                 comp_score_thr=0.3,
+                 text_region_thr=0.2,
+                 center_region_thr=0.2,
+                 center_region_area_thr=50,
+                 local_graph_thr=0.7,
+                 **kwargs):
+        super().__init__()
+        assert isinstance(in_channels, int)
+        assert isinstance(k_at_hops, tuple)
+        assert isinstance(num_adjacent_linkages, int)
+        assert isinstance(node_geo_feat_len, int)
+        assert isinstance(pooling_scale, float)
+        assert isinstance(pooling_output_size, tuple)
+        assert isinstance(comp_shrink_ratio, float)
+        assert isinstance(nms_thr, float)
+        assert isinstance(min_width, float)
+        assert isinstance(max_width, float)
+        assert isinstance(comp_ratio, float)
+        assert isinstance(comp_score_thr, float)
+        assert isinstance(text_region_thr, float)
+        assert isinstance(center_region_thr, float)
+        assert isinstance(center_region_area_thr, int)
+        assert isinstance(local_graph_thr, float)
+        self.in_channels = in_channels
+        self.out_channels = 6
+        self.downsample_ratio = 1.0
+        self.k_at_hops = k_at_hops
+        self.num_adjacent_linkages = num_adjacent_linkages
+        self.node_geo_feat_len = node_geo_feat_len
+        self.pooling_scale = pooling_scale
+        self.pooling_output_size = pooling_output_size
+        self.comp_shrink_ratio = comp_shrink_ratio
+        self.nms_thr = nms_thr
+        self.min_width = min_width
+        self.max_width = max_width
+        self.comp_ratio = comp_ratio
+        self.comp_score_thr = comp_score_thr
+        self.text_region_thr = text_region_thr
+        self.center_region_thr = center_region_thr
+        self.center_region_area_thr = center_region_area_thr
+        self.local_graph_thr = local_graph_thr
+        self.out_conv = nn.Conv2D(
+            in_channels=self.in_channels,
+            out_channels=self.out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0)
+        self.graph_train = LocalGraphs(
+            self.k_at_hops, self.num_adjacent_linkages, self.node_geo_feat_len,
+            self.pooling_scale, self.pooling_output_size, self.local_graph_thr)
+        self.graph_test = ProposalLocalGraphs(
+            self.k_at_hops, self.num_adjacent_linkages, self.node_geo_feat_len,
+            self.pooling_scale, self.pooling_output_size, self.nms_thr,
+            self.min_width, self.max_width, self.comp_shrink_ratio,
+            self.comp_ratio, self.comp_score_thr, self.text_region_thr,
+            self.center_region_thr, self.center_region_area_thr)
+        pool_w, pool_h = self.pooling_output_size
+        node_feat_len = (pool_w * pool_h) * (
+            self.in_channels + self.out_channels) + self.node_geo_feat_len
+        self.gcn = GCN(node_feat_len)
+    def forward(self, inputs, targets=None):
+        """
+        Args:
+            inputs (Tensor): Shape of :math:`(N, C, H, W)`.
+            gt_comp_attribs (list[ndarray]): The padded text component
+                attributes. Shape: (num_component, 8).
+        Returns:
+            tuple: Returns (pred_maps, (gcn_pred, gt_labels)).
+                - | pred_maps (Tensor): Prediction map with shape
+                    :math:`(N, C_{out}, H, W)`.
+                - | gcn_pred (Tensor): Prediction from GCN module, with
+                    shape :math:`(N, 2)`.
+                - | gt_labels (Tensor): Ground-truth label with shape
+                    :math:`(N, 8)`.
+        """
+        if self.training:
+            assert targets is not None
+            gt_comp_attribs = targets[7]
+            pred_maps = self.out_conv(inputs)
+            feat_maps = paddle.concat([inputs, pred_maps], axis=1)
+            node_feats, adjacent_matrices, knn_inds, gt_labels = self.graph_train(
+                feat_maps, np.stack(gt_comp_attribs))
+            gcn_pred = self.gcn(node_feats, adjacent_matrices, knn_inds)
+            return pred_maps, (gcn_pred, gt_labels)
+        else:
+            return self.single_test(inputs)
+    def single_test(self, feat_maps):
+        r"""
+        Args:
+            feat_maps (Tensor): Shape of :math:`(N, C, H, W)`.
+        Returns:
+            tuple: Returns (edge, score, text_comps).
+                - | edge (ndarray): The edge array of shape :math:`(N, 2)`
+                    where each row is a pair of text component indices
+                    that makes up an edge in graph.
+                - | score (ndarray): The score array of shape :math:`(N,)`,
+                    corresponding to the edge above.
+                - | text_comps (ndarray): The text components of shape
+                    :math:`(N, 9)` where each row corresponds to one box and
+                    its score: (x1, y1, x2, y2, x3, y3, x4, y4, score).
+        """
+        pred_maps = self.out_conv(feat_maps)
+        feat_maps = paddle.concat([feat_maps, pred_maps], axis=1)
+        none_flag, graph_data = self.graph_test(pred_maps, feat_maps)
+        (local_graphs_node_feat, adjacent_matrices, pivots_knn_inds,
+         pivot_local_graphs, text_comps) = graph_data
+        if none_flag:
+            return None, None, None
+        gcn_pred = self.gcn(local_graphs_node_feat, adjacent_matrices,
+                            pivots_knn_inds)
+        pred_labels = F.softmax(gcn_pred, axis=1)
+        edges = []
+        scores = []
+        pivot_local_graphs = pivot_local_graphs.squeeze().numpy()
+        for pivot_ind, pivot_local_graph in enumerate(pivot_local_graphs):
+            pivot = pivot_local_graph[0]
+            for k_ind, neighbor_ind in enumerate(pivots_knn_inds[pivot_ind]):
+                neighbor = pivot_local_graph[neighbor_ind.item()]
+                edges.append([pivot, neighbor])
+                scores.append(pred_labels[pivot_ind * pivots_knn_inds.shape[1] +
+                                          k_ind, 1].item())
+        edges = np.asarray(edges)
+        scores = np.asarray(scores)
+        return edges, scores, text_comps
--- a/ppocr/modeling/heads/gcn.py
+++ b/ppocr/modeling/heads/gcn.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/modules/gcn.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+class BatchNorm1D(nn.BatchNorm1D):
+    def __init__(self,
+                 num_features,
+                 eps=1e-05,
+                 momentum=0.1,
+                 affine=True,
+                 track_running_stats=True):
+        momentum = 1 - momentum
+        weight_attr = None
+        bias_attr = None
+        if not affine:
+            weight_attr = paddle.ParamAttr(learning_rate=0.0)
+            bias_attr = paddle.ParamAttr(learning_rate=0.0)
+        super().__init__(
+            num_features,
+            momentum=momentum,
+            epsilon=eps,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            use_global_stats=track_running_stats)
+class MeanAggregator(nn.Layer):
+    def forward(self, features, A):
+        x = paddle.bmm(A, features)
+        return x
+class GraphConv(nn.Layer):
+    def __init__(self, in_dim, out_dim):
+        super().__init__()
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.weight = self.create_parameter(
+            [in_dim * 2, out_dim],
+            default_initializer=nn.initializer.XavierUniform())
+        self.bias = self.create_parameter(
+            [out_dim],
+            is_bias=True,
+            default_initializer=nn.initializer.Assign([0] * out_dim))
+        self.aggregator = MeanAggregator()
+    def forward(self, features, A):
+        b, n, d = features.shape
+        assert d == self.in_dim
+        agg_feats = self.aggregator(features, A)
+        cat_feats = paddle.concat([features, agg_feats], axis=2)
+        out = paddle.einsum('bnd,df->bnf', cat_feats, self.weight)
+        out = F.relu(out + self.bias)
+        return out
+class GCN(nn.Layer):
+    def __init__(self, feat_len):
+        super(GCN, self).__init__()
+        self.bn0 = BatchNorm1D(feat_len, affine=False)
+        self.conv1 = GraphConv(feat_len, 512)
+        self.conv2 = GraphConv(512, 256)
+        self.conv3 = GraphConv(256, 128)
+        self.conv4 = GraphConv(128, 64)
+        self.classifier = nn.Sequential(
+            nn.Linear(64, 32), nn.PReLU(32), nn.Linear(32, 2))
+    def forward(self, x, A, knn_inds):
+        num_local_graphs, num_max_nodes, feat_len = x.shape
+        x = x.reshape([-1, feat_len])
+        x = self.bn0(x)
+        x = x.reshape([num_local_graphs, num_max_nodes, feat_len])
+        x = self.conv1(x, A)
+        x = self.conv2(x, A)
+        x = self.conv3(x, A)
+        x = self.conv4(x, A)
+        k = knn_inds.shape[-1]
+        mid_feat_len = x.shape[-1]
+        edge_feat = paddle.zeros([num_local_graphs, k, mid_feat_len])
+        for graph_ind in range(num_local_graphs):
+            edge_feat[graph_ind, :, :] = x[graph_ind][paddle.to_tensor(knn_inds[
+                graph_ind])]
+        edge_feat = edge_feat.reshape([-1, mid_feat_len])
+        pred = self.classifier(edge_feat)
+        return pred
--- a/ppocr/modeling/heads/local_graph.py
+++ b/ppocr/modeling/heads/local_graph.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/modules/local_graph.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import paddle
+import paddle.nn as nn
+from ppocr.ext_op import RoIAlignRotated
+def normalize_adjacent_matrix(A):
+    assert A.ndim == 2
+    assert A.shape[0] == A.shape[1]
+    A = A + np.eye(A.shape[0])
+    d = np.sum(A, axis=0)
+    d = np.clip(d, 0, None)
+    d_inv = np.power(d, -0.5).flatten()
+    d_inv[np.isinf(d_inv)] = 0.0
+    d_inv = np.diag(d_inv)
+    G = A.dot(d_inv).transpose().dot(d_inv)
+    return G
+def euclidean_distance_matrix(A, B):
+    """Calculate the Euclidean distance matrix.
+    Args:
+        A (ndarray): The point sequence.
+        B (ndarray): The point sequence with the same dimensions as A.
+    returns:
+        D (ndarray): The Euclidean distance matrix.
+    """
+    assert A.ndim == 2
+    assert B.ndim == 2
+    assert A.shape[1] == B.shape[1]
+    m = A.shape[0]
+    n = B.shape[0]
+    A_dots = (A * A).sum(axis=1).reshape((m, 1)) * np.ones(shape=(1, n))
+    B_dots = (B * B).sum(axis=1) * np.ones(shape=(m, 1))
+    D_squared = A_dots + B_dots - 2 * A.dot(B.T)
+    zero_mask = np.less(D_squared, 0.0)
+    D_squared[zero_mask] = 0.0
+    D = np.sqrt(D_squared)
+    return D
+def feature_embedding(input_feats, out_feat_len):
+    """Embed features. This code was partially adapted from
+    https://github.com/GXYM/DRRG licensed under the MIT license.
+    Args:
+        input_feats (ndarray): The input features of shape (N, d), where N is
+            the number of nodes in graph, d is the input feature vector length.
+        out_feat_len (int): The length of output feature vector.
+    Returns:
+        embedded_feats (ndarray): The embedded features.
+    """
+    assert input_feats.ndim == 2
+    assert isinstance(out_feat_len, int)
+    assert out_feat_len >= input_feats.shape[1]
+    num_nodes = input_feats.shape[0]
+    feat_dim = input_feats.shape[1]
+    feat_repeat_times = out_feat_len // feat_dim
+    residue_dim = out_feat_len % feat_dim
+    if residue_dim > 0:
+        embed_wave = np.array([
+            np.power(1000, 2.0 * (j // 2) / feat_repeat_times + 1)
+            for j in range(feat_repeat_times + 1)
+        ]).reshape((feat_repeat_times + 1, 1, 1))
+        repeat_feats = np.repeat(
+            np.expand_dims(
+                input_feats, axis=0), feat_repeat_times, axis=0)
+        residue_feats = np.hstack([
+            input_feats[:, 0:residue_dim], np.zeros(
+                (num_nodes, feat_dim - residue_dim))
+        ])
+        residue_feats = np.expand_dims(residue_feats, axis=0)
+        repeat_feats = np.concatenate([repeat_feats, residue_feats], axis=0)
+        embedded_feats = repeat_feats / embed_wave
+        embedded_feats[:, 0::2] = np.sin(embedded_feats[:, 0::2])
+        embedded_feats[:, 1::2] = np.cos(embedded_feats[:, 1::2])
+        embedded_feats = np.transpose(embedded_feats, (1, 0, 2)).reshape(
+            (num_nodes, -1))[:, 0:out_feat_len]
+    else:
+        embed_wave = np.array([
+            np.power(1000, 2.0 * (j // 2) / feat_repeat_times)
+            for j in range(feat_repeat_times)
+        ]).reshape((feat_repeat_times, 1, 1))
+        repeat_feats = np.repeat(
+            np.expand_dims(
+                input_feats, axis=0), feat_repeat_times, axis=0)
+        embedded_feats = repeat_feats / embed_wave
+        embedded_feats[:, 0::2] = np.sin(embedded_feats[:, 0::2])
+        embedded_feats[:, 1::2] = np.cos(embedded_feats[:, 1::2])
+        embedded_feats = np.transpose(embedded_feats, (1, 0, 2)).reshape(
+            (num_nodes, -1)).astype(np.float32)
+    return embedded_feats
+class LocalGraphs:
+    def __init__(self, k_at_hops, num_adjacent_linkages, node_geo_feat_len,
+                 pooling_scale, pooling_output_size, local_graph_thr):
+        assert len(k_at_hops) == 2
+        assert all(isinstance(n, int) for n in k_at_hops)
+        assert isinstance(num_adjacent_linkages, int)
+        assert isinstance(node_geo_feat_len, int)
+        assert isinstance(pooling_scale, float)
+        assert all(isinstance(n, int) for n in pooling_output_size)
+        assert isinstance(local_graph_thr, float)
+        self.k_at_hops = k_at_hops
+        self.num_adjacent_linkages = num_adjacent_linkages
+        self.node_geo_feat_dim = node_geo_feat_len
+        self.pooling = RoIAlignRotated(pooling_output_size, pooling_scale)
+        self.local_graph_thr = local_graph_thr
+    def generate_local_graphs(self, sorted_dist_inds, gt_comp_labels):
+        """Generate local graphs for GCN to predict which instance a text
+        component belongs to.
+        Args:
+            sorted_dist_inds (ndarray): The complete graph node indices, which
+                is sorted according to the Euclidean distance.
+            gt_comp_labels(ndarray): The ground truth labels define the
+                instance to which the text components (nodes in graphs) belong.
+        Returns:
+            pivot_local_graphs(list[list[int]]): The list of local graph
+                neighbor indices of pivots.
+            pivot_knns(list[list[int]]): The list of k-nearest neighbor indices
+                of pivots.
+        """
+        assert sorted_dist_inds.ndim == 2
+        assert (sorted_dist_inds.shape[0] == sorted_dist_inds.shape[1] ==
+                gt_comp_labels.shape[0])
+        knn_graph = sorted_dist_inds[:, 1:self.k_at_hops[0] + 1]
+        pivot_local_graphs = []
+        pivot_knns = []
+        for pivot_ind, knn in enumerate(knn_graph):
+            local_graph_neighbors = set(knn)
+            for neighbor_ind in knn:
+                local_graph_neighbors.update(
+                    set(sorted_dist_inds[neighbor_ind, 1:self.k_at_hops[1] +
+                                         1]))
+            local_graph_neighbors.discard(pivot_ind)
+            pivot_local_graph = list(local_graph_neighbors)
+            pivot_local_graph.insert(0, pivot_ind)
+            pivot_knn = [pivot_ind] + list(knn)
+            if pivot_ind < 1:
+                pivot_local_graphs.append(pivot_local_graph)
+                pivot_knns.append(pivot_knn)
+            else:
+                add_flag = True
+                for graph_ind, added_knn in enumerate(pivot_knns):
+                    added_pivot_ind = added_knn[0]
+                    added_local_graph = pivot_local_graphs[graph_ind]
+                    union = len(
+                        set(pivot_local_graph[1:]).union(
+                            set(added_local_graph[1:])))
+                    intersect = len(
+                        set(pivot_local_graph[1:]).intersection(
+                            set(added_local_graph[1:])))
+                    local_graph_iou = intersect / (union + 1e-8)
+                    if (local_graph_iou > self.local_graph_thr and
+                            pivot_ind in added_knn and
+                            gt_comp_labels[added_pivot_ind] ==
+                            gt_comp_labels[pivot_ind] and
+                            gt_comp_labels[pivot_ind] != 0):
+                        add_flag = False
+                        break
+                if add_flag:
+                    pivot_local_graphs.append(pivot_local_graph)
+                    pivot_knns.append(pivot_knn)
+        return pivot_local_graphs, pivot_knns
+    def generate_gcn_input(self, node_feat_batch, node_label_batch,
+                           local_graph_batch, knn_batch, sorted_dist_ind_batch):
+        """Generate graph convolution network input data.
+        Args:
+            node_feat_batch (List[Tensor]): The batched graph node features.
+            node_label_batch (List[ndarray]): The batched text component
+                labels.
+            local_graph_batch (List[List[list[int]]]): The local graph node
+                indices of image batch.
+            knn_batch (List[List[list[int]]]): The knn graph node indices of
+                image batch.
+            sorted_dist_ind_batch (list[ndarray]): The node indices sorted
+                according to the Euclidean distance.
+        Returns:
+            local_graphs_node_feat (Tensor): The node features of graph.
+            adjacent_matrices (Tensor): The adjacent matrices of local graphs.
+            pivots_knn_inds (Tensor): The k-nearest neighbor indices in
+                local graph.
+            gt_linkage (Tensor): The surpervision signal of GCN for linkage
+                prediction.
+        """
+        assert isinstance(node_feat_batch, list)
+        assert isinstance(node_label_batch, list)
+        assert isinstance(local_graph_batch, list)
+        assert isinstance(knn_batch, list)
+        assert isinstance(sorted_dist_ind_batch, list)
+        num_max_nodes = max([
+            len(pivot_local_graph)
+            for pivot_local_graphs in local_graph_batch
+            for pivot_local_graph in pivot_local_graphs
+        ])
+        local_graphs_node_feat = []
+        adjacent_matrices = []
+        pivots_knn_inds = []
+        pivots_gt_linkage = []
+        for batch_ind, sorted_dist_inds in enumerate(sorted_dist_ind_batch):
+            node_feats = node_feat_batch[batch_ind]
+            pivot_local_graphs = local_graph_batch[batch_ind]
+            pivot_knns = knn_batch[batch_ind]
+            node_labels = node_label_batch[batch_ind]
+            for graph_ind, pivot_knn in enumerate(pivot_knns):
+                pivot_local_graph = pivot_local_graphs[graph_ind]
+                num_nodes = len(pivot_local_graph)
+                pivot_ind = pivot_local_graph[0]
+                node2ind_map = {j: i for i, j in enumerate(pivot_local_graph)}
+                knn_inds = paddle.to_tensor(
+                    [node2ind_map[i] for i in pivot_knn[1:]])
+                pivot_feats = node_feats[pivot_ind]
+                normalized_feats = node_feats[paddle.to_tensor(
+                    pivot_local_graph)] - pivot_feats
+                adjacent_matrix = np.zeros(
+                    (num_nodes, num_nodes), dtype=np.float32)
+                for node in pivot_local_graph:
+                    neighbors = sorted_dist_inds[node, 1:
+                                                 self.num_adjacent_linkages + 1]
+                    for neighbor in neighbors:
+                        if neighbor in pivot_local_graph:
+                            adjacent_matrix[node2ind_map[node], node2ind_map[
+                                neighbor]] = 1
+                            adjacent_matrix[node2ind_map[neighbor],
+                                            node2ind_map[node]] = 1
+                adjacent_matrix = normalize_adjacent_matrix(adjacent_matrix)
+                pad_adjacent_matrix = paddle.zeros(
+                    (num_max_nodes, num_max_nodes))
+                pad_adjacent_matrix[:num_nodes, :num_nodes] = paddle.cast(
+                    paddle.to_tensor(adjacent_matrix), 'float32')
+                pad_normalized_feats = paddle.concat(
+                    [
+                        normalized_feats, paddle.zeros(
+                            (num_max_nodes - num_nodes,
+                             normalized_feats.shape[1]))
+                    ],
+                    axis=0)
+                local_graph_labels = node_labels[pivot_local_graph]
+                knn_labels = local_graph_labels[knn_inds.numpy()]
+                link_labels = ((node_labels[pivot_ind] == knn_labels) &
+                               (node_labels[pivot_ind] > 0)).astype(np.int64)
+                link_labels = paddle.to_tensor(link_labels)
+                local_graphs_node_feat.append(pad_normalized_feats)
+                adjacent_matrices.append(pad_adjacent_matrix)
+                pivots_knn_inds.append(knn_inds)
+                pivots_gt_linkage.append(link_labels)
+        local_graphs_node_feat = paddle.stack(local_graphs_node_feat, 0)
+        adjacent_matrices = paddle.stack(adjacent_matrices, 0)
+        pivots_knn_inds = paddle.stack(pivots_knn_inds, 0)
+        pivots_gt_linkage = paddle.stack(pivots_gt_linkage, 0)
+        return (local_graphs_node_feat, adjacent_matrices, pivots_knn_inds,
+                pivots_gt_linkage)
+    def __call__(self, feat_maps, comp_attribs):
+        """Generate local graphs as GCN input.
+        Args:
+            feat_maps (Tensor): The feature maps to extract the content
+                features of text components.
+            comp_attribs (ndarray): The text component attributes.
+        Returns:
+            local_graphs_node_feat (Tensor): The node features of graph.
+            adjacent_matrices (Tensor): The adjacent matrices of local graphs.
+            pivots_knn_inds (Tensor): The k-nearest neighbor indices in local
+                graph.
+            gt_linkage (Tensor): The surpervision signal of GCN for linkage
+                prediction.
+        """
+        assert isinstance(feat_maps, paddle.Tensor)
+        assert comp_attribs.ndim == 3
+        assert comp_attribs.shape[2] == 8
+        sorted_dist_inds_batch = []
+        local_graph_batch = []
+        knn_batch = []
+        node_feat_batch = []
+        node_label_batch = []
+        for batch_ind in range(comp_attribs.shape[0]):
+            num_comps = int(comp_attribs[batch_ind, 0, 0])
+            comp_geo_attribs = comp_attribs[batch_ind, :num_comps, 1:7]
+            node_labels = comp_attribs[batch_ind, :num_comps, 7].astype(
+                np.int32)
+            comp_centers = comp_geo_attribs[:, 0:2]
+            distance_matrix = euclidean_distance_matrix(comp_centers,
+                                                        comp_centers)
+            batch_id = np.zeros(
+                (comp_geo_attribs.shape[0], 1), dtype=np.float32) * batch_ind
+            comp_geo_attribs[:, -2] = np.clip(comp_geo_attribs[:, -2], -1, 1)
+            angle = np.arccos(comp_geo_attribs[:, -2]) * np.sign(
+                comp_geo_attribs[:, -1])
+            angle = angle.reshape((-1, 1))
+            rotated_rois = np.hstack(
+                [batch_id, comp_geo_attribs[:, :-2], angle])
+            rois = paddle.to_tensor(rotated_rois)
+            content_feats = self.pooling(feat_maps[batch_ind].unsqueeze(0),
+                                         rois)
+            content_feats = content_feats.reshape([content_feats.shape[0], -1])
+            geo_feats = feature_embedding(comp_geo_attribs,
+                                          self.node_geo_feat_dim)
+            geo_feats = paddle.to_tensor(geo_feats)
+            node_feats = paddle.concat([content_feats, geo_feats], axis=-1)
+            sorted_dist_inds = np.argsort(distance_matrix, axis=1)
+            pivot_local_graphs, pivot_knns = self.generate_local_graphs(
+                sorted_dist_inds, node_labels)
+            node_feat_batch.append(node_feats)
+            node_label_batch.append(node_labels)
+            local_graph_batch.append(pivot_local_graphs)
+            knn_batch.append(pivot_knns)
+            sorted_dist_inds_batch.append(sorted_dist_inds)
+        (node_feats, adjacent_matrices, knn_inds, gt_linkage) = \
+            self.generate_gcn_input(node_feat_batch,
+                                    node_label_batch,
+                                    local_graph_batch,
+                                    knn_batch,
+                                    sorted_dist_inds_batch)
+        return node_feats, adjacent_matrices, knn_inds, gt_linkage
--- a/ppocr/modeling/heads/proposal_local_graph.py
+++ b/ppocr/modeling/heads/proposal_local_graph.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/modules/proposal_local_graph.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from lanms import merge_quadrangle_n9 as la_nms
+from ppocr.ext_op import RoIAlignRotated
+from .local_graph import (euclidean_distance_matrix, feature_embedding,
+                          normalize_adjacent_matrix)
+def fill_hole(input_mask):
+    h, w = input_mask.shape
+    canvas = np.zeros((h + 2, w + 2), np.uint8)
+    canvas[1:h + 1, 1:w + 1] = input_mask.copy()
+    mask = np.zeros((h + 4, w + 4), np.uint8)
+    cv2.floodFill(canvas, mask, (0, 0), 1)
+    canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
+    return ~canvas | input_mask
+class ProposalLocalGraphs:
+    def __init__(self, k_at_hops, num_adjacent_linkages, node_geo_feat_len,
+                 pooling_scale, pooling_output_size, nms_thr, min_width,
+                 max_width, comp_shrink_ratio, comp_w_h_ratio, comp_score_thr,
+                 text_region_thr, center_region_thr, center_region_area_thr):
+        assert len(k_at_hops) == 2
+        assert isinstance(k_at_hops, tuple)
+        assert isinstance(num_adjacent_linkages, int)
+        assert isinstance(node_geo_feat_len, int)
+        assert isinstance(pooling_scale, float)
+        assert isinstance(pooling_output_size, tuple)
+        assert isinstance(nms_thr, float)
+        assert isinstance(min_width, float)
+        assert isinstance(max_width, float)
+        assert isinstance(comp_shrink_ratio, float)
+        assert isinstance(comp_w_h_ratio, float)
+        assert isinstance(comp_score_thr, float)
+        assert isinstance(text_region_thr, float)
+        assert isinstance(center_region_thr, float)
+        assert isinstance(center_region_area_thr, int)
+        self.k_at_hops = k_at_hops
+        self.active_connection = num_adjacent_linkages
+        self.local_graph_depth = len(self.k_at_hops)
+        self.node_geo_feat_dim = node_geo_feat_len
+        self.pooling = RoIAlignRotated(pooling_output_size, pooling_scale)
+        self.nms_thr = nms_thr
+        self.min_width = min_width
+        self.max_width = max_width
+        self.comp_shrink_ratio = comp_shrink_ratio
+        self.comp_w_h_ratio = comp_w_h_ratio
+        self.comp_score_thr = comp_score_thr
+        self.text_region_thr = text_region_thr
+        self.center_region_thr = center_region_thr
+        self.center_region_area_thr = center_region_area_thr
+    def propose_comps(self, score_map, top_height_map, bot_height_map, sin_map,
+                      cos_map, comp_score_thr, min_width, max_width,
+                      comp_shrink_ratio, comp_w_h_ratio):
+        """Propose text components.
+        Args:
+            score_map (ndarray): The score map for NMS.
+            top_height_map (ndarray): The predicted text height map from each
+                pixel in text center region to top sideline.
+            bot_height_map (ndarray): The predicted text height map from each
+                pixel in text center region to bottom sideline.
+            sin_map (ndarray): The predicted sin(theta) map.
+            cos_map (ndarray): The predicted cos(theta) map.
+            comp_score_thr (float): The score threshold of text component.
+            min_width (float): The minimum width of text components.
+            max_width (float): The maximum width of text components.
+            comp_shrink_ratio (float): The shrink ratio of text components.
+            comp_w_h_ratio (float): The width to height ratio of text
+                components.
+        Returns:
+            text_comps (ndarray): The text components.
+        """
+        comp_centers = np.argwhere(score_map > comp_score_thr)
+        comp_centers = comp_centers[np.argsort(comp_centers[:, 0])]
+        y = comp_centers[:, 0]
+        x = comp_centers[:, 1]
+        top_height = top_height_map[y, x].reshape((-1, 1)) * comp_shrink_ratio
+        bot_height = bot_height_map[y, x].reshape((-1, 1)) * comp_shrink_ratio
+        sin = sin_map[y, x].reshape((-1, 1))
+        cos = cos_map[y, x].reshape((-1, 1))
+        top_mid_pts = comp_centers + np.hstack(
+            [top_height * sin, top_height * cos])
+        bot_mid_pts = comp_centers - np.hstack(
+            [bot_height * sin, bot_height * cos])
+        width = (top_height + bot_height) * comp_w_h_ratio
+        width = np.clip(width, min_width, max_width)
+        r = width / 2
+        tl = top_mid_pts[:, ::-1] - np.hstack([-r * sin, r * cos])
+        tr = top_mid_pts[:, ::-1] + np.hstack([-r * sin, r * cos])
+        br = bot_mid_pts[:, ::-1] + np.hstack([-r * sin, r * cos])
+        bl = bot_mid_pts[:, ::-1] - np.hstack([-r * sin, r * cos])
+        text_comps = np.hstack([tl, tr, br, bl]).astype(np.float32)
+        score = score_map[y, x].reshape((-1, 1))
+        text_comps = np.hstack([text_comps, score])
+        return text_comps
+    def propose_comps_and_attribs(self, text_region_map, center_region_map,
+                                  top_height_map, bot_height_map, sin_map,
+                                  cos_map):
+        """Generate text components and attributes.
+        Args:
+            text_region_map (ndarray): The predicted text region probability
+                map.
+            center_region_map (ndarray): The predicted text center region
+                probability map.
+            top_height_map (ndarray): The predicted text height map from each
+                pixel in text center region to top sideline.
+            bot_height_map (ndarray): The predicted text height map from each
+                pixel in text center region to bottom sideline.
+            sin_map (ndarray): The predicted sin(theta) map.
+            cos_map (ndarray): The predicted cos(theta) map.
+        Returns:
+            comp_attribs (ndarray): The text component attributes.
+            text_comps (ndarray): The text components.
+        """
+        assert (text_region_map.shape == center_region_map.shape ==
+                top_height_map.shape == bot_height_map.shape == sin_map.shape ==
+                cos_map.shape)
+        text_mask = text_region_map > self.text_region_thr
+        center_region_mask = (
+            center_region_map > self.center_region_thr) * text_mask
+        scale = np.sqrt(1.0 / (sin_map**2 + cos_map**2 + 1e-8))
+        sin_map, cos_map = sin_map * scale, cos_map * scale
+        center_region_mask = fill_hole(center_region_mask)
+        center_region_contours, _ = cv2.findContours(
+            center_region_mask.astype(np.uint8), cv2.RETR_TREE,
+            cv2.CHAIN_APPROX_SIMPLE)
+        mask_sz = center_region_map.shape
+        comp_list = []
+        for contour in center_region_contours:
+            current_center_mask = np.zeros(mask_sz)
+            cv2.drawContours(current_center_mask, [contour], -1, 1, -1)
+            if current_center_mask.sum() <= self.center_region_area_thr:
+                continue
+            score_map = text_region_map * current_center_mask
+            text_comps = self.propose_comps(
+                score_map, top_height_map, bot_height_map, sin_map, cos_map,
+                self.comp_score_thr, self.min_width, self.max_width,
+                self.comp_shrink_ratio, self.comp_w_h_ratio)
+            text_comps = la_nms(text_comps, self.nms_thr)
+            text_comp_mask = np.zeros(mask_sz)
+            text_comp_boxes = text_comps[:, :8].reshape(
+                (-1, 4, 2)).astype(np.int32)
+            cv2.drawContours(text_comp_mask, text_comp_boxes, -1, 1, -1)
+            if (text_comp_mask * text_mask).sum() < text_comp_mask.sum() * 0.5:
+                continue
+            if text_comps.shape[-1] > 0:
+                comp_list.append(text_comps)
+        if len(comp_list) <= 0:
+            return None, None
+        text_comps = np.vstack(comp_list)
+        text_comp_boxes = text_comps[:, :8].reshape((-1, 4, 2))
+        centers = np.mean(text_comp_boxes, axis=1).astype(np.int32)
+        x = centers[:, 0]
+        y = centers[:, 1]
+        scores = []
+        for text_comp_box in text_comp_boxes:
+            text_comp_box[:, 0] = np.clip(text_comp_box[:, 0], 0,
+                                          mask_sz[1] - 1)
+            text_comp_box[:, 1] = np.clip(text_comp_box[:, 1], 0,
+                                          mask_sz[0] - 1)
+            min_coord = np.min(text_comp_box, axis=0).astype(np.int32)
+            max_coord = np.max(text_comp_box, axis=0).astype(np.int32)
+            text_comp_box = text_comp_box - min_coord
+            box_sz = (max_coord - min_coord + 1)
+            temp_comp_mask = np.zeros((box_sz[1], box_sz[0]), dtype=np.uint8)
+            cv2.fillPoly(temp_comp_mask, [text_comp_box.astype(np.int32)], 1)
+            temp_region_patch = text_region_map[min_coord[1]:(max_coord[1] + 1),
+                                                min_coord[0]:(max_coord[0] + 1)]
+            score = cv2.mean(temp_region_patch, temp_comp_mask)[0]
+            scores.append(score)
+        scores = np.array(scores).reshape((-1, 1))
+        text_comps = np.hstack([text_comps[:, :-1], scores])
+        h = top_height_map[y, x].reshape(
+            (-1, 1)) + bot_height_map[y, x].reshape((-1, 1))
+        w = np.clip(h * self.comp_w_h_ratio, self.min_width, self.max_width)
+        sin = sin_map[y, x].reshape((-1, 1))
+        cos = cos_map[y, x].reshape((-1, 1))
+        x = x.reshape((-1, 1))
+        y = y.reshape((-1, 1))
+        comp_attribs = np.hstack([x, y, h, w, cos, sin])
+        return comp_attribs, text_comps
+    def generate_local_graphs(self, sorted_dist_inds, node_feats):
+        """Generate local graphs and graph convolution network input data.
+        Args:
+            sorted_dist_inds (ndarray): The node indices sorted according to
+                the Euclidean distance.
+            node_feats (tensor): The features of nodes in graph.
+        Returns:
+            local_graphs_node_feats (tensor): The features of nodes in local
+                graphs.
+            adjacent_matrices (tensor): The adjacent matrices.
+            pivots_knn_inds (tensor): The k-nearest neighbor indices in
+                local graphs.
+            pivots_local_graphs (tensor): The indices of nodes in local
+                graphs.
+        """
+        assert sorted_dist_inds.ndim == 2
+        assert (sorted_dist_inds.shape[0] == sorted_dist_inds.shape[1] ==
+                node_feats.shape[0])
+        knn_graph = sorted_dist_inds[:, 1:self.k_at_hops[0] + 1]
+        pivot_local_graphs = []
+        pivot_knns = []
+        for pivot_ind, knn in enumerate(knn_graph):
+            local_graph_neighbors = set(knn)
+            for neighbor_ind in knn:
+                local_graph_neighbors.update(
+                    set(sorted_dist_inds[neighbor_ind, 1:self.k_at_hops[1] +
+                                         1]))
+            local_graph_neighbors.discard(pivot_ind)
+            pivot_local_graph = list(local_graph_neighbors)
+            pivot_local_graph.insert(0, pivot_ind)
+            pivot_knn = [pivot_ind] + list(knn)
+            pivot_local_graphs.append(pivot_local_graph)
+            pivot_knns.append(pivot_knn)
+        num_max_nodes = max([
+            len(pivot_local_graph) for pivot_local_graph in pivot_local_graphs
+        ])
+        local_graphs_node_feat = []
+        adjacent_matrices = []
+        pivots_knn_inds = []
+        pivots_local_graphs = []
+        for graph_ind, pivot_knn in enumerate(pivot_knns):
+            pivot_local_graph = pivot_local_graphs[graph_ind]
+            num_nodes = len(pivot_local_graph)
+            pivot_ind = pivot_local_graph[0]
+            node2ind_map = {j: i for i, j in enumerate(pivot_local_graph)}
+            knn_inds = paddle.cast(
+                paddle.to_tensor([node2ind_map[i]
+                                  for i in pivot_knn[1:]]), 'int64')
+            pivot_feats = node_feats[pivot_ind]
+            normalized_feats = node_feats[paddle.to_tensor(
+                pivot_local_graph)] - pivot_feats
+            adjacent_matrix = np.zeros((num_nodes, num_nodes), dtype=np.float32)
+            for node in pivot_local_graph:
+                neighbors = sorted_dist_inds[node, 1:self.active_connection + 1]
+                for neighbor in neighbors:
+                    if neighbor in pivot_local_graph:
+                        adjacent_matrix[node2ind_map[node], node2ind_map[
+                            neighbor]] = 1
+                        adjacent_matrix[node2ind_map[neighbor], node2ind_map[
+                            node]] = 1
+            adjacent_matrix = normalize_adjacent_matrix(adjacent_matrix)
+            pad_adjacent_matrix = paddle.zeros((num_max_nodes, num_max_nodes), )
+            pad_adjacent_matrix[:num_nodes, :num_nodes] = paddle.cast(
+                paddle.to_tensor(adjacent_matrix), 'float32')
+            pad_normalized_feats = paddle.concat(
+                [
+                    normalized_feats, paddle.zeros(
+                        (num_max_nodes - num_nodes, normalized_feats.shape[1]),
+                    )
+                ],
+                axis=0)
+            local_graph_nodes = paddle.to_tensor(pivot_local_graph)
+            local_graph_nodes = paddle.concat(
+                [
+                    local_graph_nodes, paddle.zeros(
+                        [num_max_nodes - num_nodes], dtype='int64')
+                ],
+                axis=-1)
+            local_graphs_node_feat.append(pad_normalized_feats)
+            adjacent_matrices.append(pad_adjacent_matrix)
+            pivots_knn_inds.append(knn_inds)
+            pivots_local_graphs.append(local_graph_nodes)
+        local_graphs_node_feat = paddle.stack(local_graphs_node_feat, 0)
+        adjacent_matrices = paddle.stack(adjacent_matrices, 0)
+        pivots_knn_inds = paddle.stack(pivots_knn_inds, 0)
+        pivots_local_graphs = paddle.stack(pivots_local_graphs, 0)
+        return (local_graphs_node_feat, adjacent_matrices, pivots_knn_inds,
+                pivots_local_graphs)
+    def __call__(self, preds, feat_maps):
+        """Generate local graphs and graph convolutional network input data.
+        Args:
+            preds (tensor): The predicted maps.
+            feat_maps (tensor): The feature maps to extract content feature of
+                text components.
+        Returns:
+            none_flag (bool): The flag showing whether the number of proposed
+                text components is 0.
+            local_graphs_node_feats (tensor): The features of nodes in local
+                graphs.
+            adjacent_matrices (tensor): The adjacent matrices.
+            pivots_knn_inds (tensor): The k-nearest neighbor indices in
+                local graphs.
+            pivots_local_graphs (tensor): The indices of nodes in local
+                graphs.
+            text_comps (ndarray): The predicted text components.
+        """
+        if preds.ndim == 4:
+            assert preds.shape[0] == 1
+            preds = paddle.squeeze(preds)
+        pred_text_region = F.sigmoid(preds[0]).numpy()
+        pred_center_region = F.sigmoid(preds[1]).numpy()
+        pred_sin_map = preds[2].numpy()
+        pred_cos_map = preds[3].numpy()
+        pred_top_height_map = preds[4].numpy()
+        pred_bot_height_map = preds[5].numpy()
+        comp_attribs, text_comps = self.propose_comps_and_attribs(
+            pred_text_region, pred_center_region, pred_top_height_map,
+            pred_bot_height_map, pred_sin_map, pred_cos_map)
+        if comp_attribs is None or len(comp_attribs) < 2:
+            none_flag = True
+            return none_flag, (0, 0, 0, 0, 0)
+        comp_centers = comp_attribs[:, 0:2]
+        distance_matrix = euclidean_distance_matrix(comp_centers, comp_centers)
+        geo_feats = feature_embedding(comp_attribs, self.node_geo_feat_dim)
+        geo_feats = paddle.to_tensor(geo_feats)
+        batch_id = np.zeros((comp_attribs.shape[0], 1), dtype=np.float32)
+        comp_attribs = comp_attribs.astype(np.float32)
+        angle = np.arccos(comp_attribs[:, -2]) * np.sign(comp_attribs[:, -1])
+        angle = angle.reshape((-1, 1))
+        rotated_rois = np.hstack([batch_id, comp_attribs[:, :-2], angle])
+        rois = paddle.to_tensor(rotated_rois)
+        content_feats = self.pooling(feat_maps, rois)
+        content_feats = content_feats.reshape([content_feats.shape[0], -1])
+        node_feats = paddle.concat([content_feats, geo_feats], axis=-1)
+        sorted_dist_inds = np.argsort(distance_matrix, axis=1)
+        (local_graphs_node_feat, adjacent_matrices, pivots_knn_inds,
+         pivots_local_graphs) = self.generate_local_graphs(sorted_dist_inds,
+                                                           node_feats)
+        none_flag = False
+        return none_flag, (local_graphs_node_feat, adjacent_matrices,
+                           pivots_knn_inds, pivots_local_graphs, text_comps)
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -27,9 +27,11 @@ def build_neck(config):
    from .pren_fpn import PRENFPN
    from .csp_pan import CSPPAN
    from .ct_fpn import CTFPN
+    from .fpn_unet import FPN_UNet
    support_dict = [
        'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN',
-        'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN', 'CSPPAN', 'CTFPN'
+        'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN', 'CSPPAN', 'CTFPN',
+        'FPN_UNet'
    ]
    module_name = config.pop('name')

--- a/ppocr/modeling/necks/fpn_unet.py
+++ b/ppocr/modeling/necks/fpn_unet.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/necks/fpn_unet.py
+"""
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+class UpBlock(nn.Layer):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        assert isinstance(in_channels, int)
+        assert isinstance(out_channels, int)
+        self.conv1x1 = nn.Conv2D(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0)
+        self.conv3x3 = nn.Conv2D(
+            in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.deconv = nn.Conv2DTranspose(
+            out_channels, out_channels, kernel_size=4, stride=2, padding=1)
+    def forward(self, x):
+        x = F.relu(self.conv1x1(x))
+        x = F.relu(self.conv3x3(x))
+        x = self.deconv(x)
+        return x
+class FPN_UNet(nn.Layer):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        assert len(in_channels) == 4
+        assert isinstance(out_channels, int)
+        self.out_channels = out_channels
+        blocks_out_channels = [out_channels] + [
+            min(out_channels * 2**i, 256) for i in range(4)
+        ]
+        blocks_in_channels = [blocks_out_channels[1]] + [
+            in_channels[i] + blocks_out_channels[i + 2] for i in range(3)
+        ] + [in_channels[3]]
+        self.up4 = nn.Conv2DTranspose(
+            blocks_in_channels[4],
+            blocks_out_channels[4],
+            kernel_size=4,
+            stride=2,
+            padding=1)
+        self.up_block3 = UpBlock(blocks_in_channels[3], blocks_out_channels[3])
+        self.up_block2 = UpBlock(blocks_in_channels[2], blocks_out_channels[2])
+        self.up_block1 = UpBlock(blocks_in_channels[1], blocks_out_channels[1])
+        self.up_block0 = UpBlock(blocks_in_channels[0], blocks_out_channels[0])
+    def forward(self, x):
+        """
+        Args:
+            x (list[Tensor] | tuple[Tensor]): A list of four tensors of shape
+                :math:`(N, C_i, H_i, W_i)`, representing C2, C3, C4, C5
+                features respectively. :math:`C_i` should matches the number in
+                ``in_channels``.
+        Returns:
+            Tensor: Shape :math:`(N, C, H, W)` where :math:`H=4H_0` and
+            :math:`W=4W_0`.
+        """
+        c2, c3, c4, c5 = x
+        x = F.relu(self.up4(c5))
+        x = paddle.concat([x, c4], axis=1)
+        x = F.relu(self.up_block3(x))
+        x = paddle.concat([x, c3], axis=1)
+        x = F.relu(self.up_block2(x))
+        x = paddle.concat([x, c2], axis=1)
+        x = F.relu(self.up_block1(x))
+        x = self.up_block0(x)
+        return x
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -36,6 +36,7 @@ from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess, Di
 from .table_postprocess import TableMasterLabelDecode, TableLabelDecode
 from .picodet_postprocess import PicoDetPostProcess
 from .ct_postprocess import CTPostProcess
+from .drrg_postprocess import DRRGPostprocess
 def build_post_process(config, global_config=None):
@@ -49,7 +50,8 @@ def build_post_process(config, global_config=None):
        'DistillationSARLabelDecode', 'ViTSTRLabelDecode', 'ABINetLabelDecode',
        'TableMasterLabelDecode', 'SPINLabelDecode',
        'DistillationSerPostProcess', 'DistillationRePostProcess',
-        'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess'
+        'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess',
+        'DRRGPostprocess'
    ]
    if config['name'] == 'PSEPostProcess':

--- a/ppocr/postprocess/drrg_postprocess.py
+++ b/ppocr/postprocess/drrg_postprocess.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/postprocess/drrg_postprocessor.py
+"""
+import functools
+import operator
+import numpy as np
+import paddle
+from numpy.linalg import norm
+import cv2
+class Node:
+    def __init__(self, ind):
+        self.__ind = ind
+        self.__links = set()
+    @property
+    def ind(self):
+        return self.__ind
+    @property
+    def links(self):
+        return set(self.__links)
+    def add_link(self, link_node):
+        self.__links.add(link_node)
+        link_node.__links.add(self)
+def graph_propagation(edges, scores, text_comps, edge_len_thr=50.):
+    assert edges.ndim == 2
+    assert edges.shape[1] == 2
+    assert edges.shape[0] == scores.shape[0]
+    assert text_comps.ndim == 2
+    assert isinstance(edge_len_thr, float)
+    edges = np.sort(edges, axis=1)
+    score_dict = {}
+    for i, edge in enumerate(edges):
+        if text_comps is not None:
+            box1 = text_comps[edge[0], :8].reshape(4, 2)
+            box2 = text_comps[edge[1], :8].reshape(4, 2)
+            center1 = np.mean(box1, axis=0)
+            center2 = np.mean(box2, axis=0)
+            distance = norm(center1 - center2)
+            if distance > edge_len_thr:
+                scores[i] = 0
+        if (edge[0], edge[1]) in score_dict:
+            score_dict[edge[0], edge[1]] = 0.5 * (
+                score_dict[edge[0], edge[1]] + scores[i])
+        else:
+            score_dict[edge[0], edge[1]] = scores[i]
+    nodes = np.sort(np.unique(edges.flatten()))
+    mapping = -1 * np.ones((np.max(nodes) + 1), dtype=np.int)
+    mapping[nodes] = np.arange(nodes.shape[0])
+    order_inds = mapping[edges]
+    vertices = [Node(node) for node in nodes]
+    for ind in order_inds:
+        vertices[ind[0]].add_link(vertices[ind[1]])
+    return vertices, score_dict
+def connected_components(nodes, score_dict, link_thr):
+    assert isinstance(nodes, list)
+    assert all([isinstance(node, Node) for node in nodes])
+    assert isinstance(score_dict, dict)
+    assert isinstance(link_thr, float)
+    clusters = []
+    nodes = set(nodes)
+    while nodes:
+        node = nodes.pop()
+        cluster = {node}
+        node_queue = [node]
+        while node_queue:
+            node = node_queue.pop(0)
+            neighbors = set([
+                neighbor for neighbor in node.links
+                if score_dict[tuple(sorted([node.ind, neighbor.ind]))] >=
+                link_thr
+            ])
+            neighbors.difference_update(cluster)
+            nodes.difference_update(neighbors)
+            cluster.update(neighbors)
+            node_queue.extend(neighbors)
+        clusters.append(list(cluster))
+    return clusters
+def clusters2labels(clusters, num_nodes):
+    assert isinstance(clusters, list)
+    assert all([isinstance(cluster, list) for cluster in clusters])
+    assert all(
+        [isinstance(node, Node) for cluster in clusters for node in cluster])
+    assert isinstance(num_nodes, int)
+    node_labels = np.zeros(num_nodes)
+    for cluster_ind, cluster in enumerate(clusters):
+        for node in cluster:
+            node_labels[node.ind] = cluster_ind
+    return node_labels
+def remove_single(text_comps, comp_pred_labels):
+    assert text_comps.ndim == 2
+    assert text_comps.shape[0] == comp_pred_labels.shape[0]
+    single_flags = np.zeros_like(comp_pred_labels)
+    pred_labels = np.unique(comp_pred_labels)
+    for label in pred_labels:
+        current_label_flag = (comp_pred_labels == label)
+        if np.sum(current_label_flag) == 1:
+            single_flags[np.where(current_label_flag)[0][0]] = 1
+    keep_ind = [i for i in range(len(comp_pred_labels)) if not single_flags[i]]
+    filtered_text_comps = text_comps[keep_ind, :]
+    filtered_labels = comp_pred_labels[keep_ind]
+    return filtered_text_comps, filtered_labels
+def norm2(point1, point2):
+    return ((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)**0.5
+def min_connect_path(points):
+    assert isinstance(points, list)
+    assert all([isinstance(point, list) for point in points])
+    assert all([isinstance(coord, int) for point in points for coord in point])
+    points_queue = points.copy()
+    shortest_path = []
+    current_edge = [[], []]
+    edge_dict0 = {}
+    edge_dict1 = {}
+    current_edge[0] = points_queue[0]
+    current_edge[1] = points_queue[0]
+    points_queue.remove(points_queue[0])
+    while points_queue:
+        for point in points_queue:
+            length0 = norm2(point, current_edge[0])
+            edge_dict0[length0] = [point, current_edge[0]]
+            length1 = norm2(current_edge[1], point)
+            edge_dict1[length1] = [current_edge[1], point]
+        key0 = min(edge_dict0.keys())
+        key1 = min(edge_dict1.keys())
+        if key0 <= key1:
+            start = edge_dict0[key0][0]
+            end = edge_dict0[key0][1]
+            shortest_path.insert(0, [points.index(start), points.index(end)])
+            points_queue.remove(start)
+            current_edge[0] = start
+        else:
+            start = edge_dict1[key1][0]
+            end = edge_dict1[key1][1]
+            shortest_path.append([points.index(start), points.index(end)])
+            points_queue.remove(end)
+            current_edge[1] = end
+        edge_dict0 = {}
+        edge_dict1 = {}
+    shortest_path = functools.reduce(operator.concat, shortest_path)
+    shortest_path = sorted(set(shortest_path), key=shortest_path.index)
+    return shortest_path
+def in_contour(cont, point):
+    x, y = point
+    is_inner = cv2.pointPolygonTest(cont, (int(x), int(y)), False) > 0.5
+    return is_inner
+def fix_corner(top_line, bot_line, start_box, end_box):
+    assert isinstance(top_line, list)
+    assert all(isinstance(point, list) for point in top_line)
+    assert isinstance(bot_line, list)
+    assert all(isinstance(point, list) for point in bot_line)
+    assert start_box.shape == end_box.shape == (4, 2)
+    contour = np.array(top_line + bot_line[::-1])
+    start_left_mid = (start_box[0] + start_box[3]) / 2
+    start_right_mid = (start_box[1] + start_box[2]) / 2
+    end_left_mid = (end_box[0] + end_box[3]) / 2
+    end_right_mid = (end_box[1] + end_box[2]) / 2
+    if not in_contour(contour, start_left_mid):
+        top_line.insert(0, start_box[0].tolist())
+        bot_line.insert(0, start_box[3].tolist())
+    elif not in_contour(contour, start_right_mid):
+        top_line.insert(0, start_box[1].tolist())
+        bot_line.insert(0, start_box[2].tolist())
+    if not in_contour(contour, end_left_mid):
+        top_line.append(end_box[0].tolist())
+        bot_line.append(end_box[3].tolist())
+    elif not in_contour(contour, end_right_mid):
+        top_line.append(end_box[1].tolist())
+        bot_line.append(end_box[2].tolist())
+    return top_line, bot_line
+def comps2boundaries(text_comps, comp_pred_labels):
+    assert text_comps.ndim == 2
+    assert len(text_comps) == len(comp_pred_labels)
+    boundaries = []
+    if len(text_comps) < 1:
+        return boundaries
+    for cluster_ind in range(0, int(np.max(comp_pred_labels)) + 1):
+        cluster_comp_inds = np.where(comp_pred_labels == cluster_ind)
+        text_comp_boxes = text_comps[cluster_comp_inds, :8].reshape(
+            (-1, 4, 2)).astype(np.int32)
+        score = np.mean(text_comps[cluster_comp_inds, -1])
+        if text_comp_boxes.shape[0] < 1:
+            continue
+        elif text_comp_boxes.shape[0] > 1:
+            centers = np.mean(text_comp_boxes, axis=1).astype(np.int32).tolist()
+            shortest_path = min_connect_path(centers)
+            text_comp_boxes = text_comp_boxes[shortest_path]
+            top_line = np.mean(
+                text_comp_boxes[:, 0:2, :], axis=1).astype(np.int32).tolist()
+            bot_line = np.mean(
+                text_comp_boxes[:, 2:4, :], axis=1).astype(np.int32).tolist()
+            top_line, bot_line = fix_corner(
+                top_line, bot_line, text_comp_boxes[0], text_comp_boxes[-1])
+            boundary_points = top_line + bot_line[::-1]
+        else:
+            top_line = text_comp_boxes[0, 0:2, :].astype(np.int32).tolist()
+            bot_line = text_comp_boxes[0, 2:4:-1, :].astype(np.int32).tolist()
+            boundary_points = top_line + bot_line
+        boundary = [p for coord in boundary_points for p in coord] + [score]
+        boundaries.append(boundary)
+    return boundaries
+class DRRGPostprocess(object):
+    """Merge text components and construct boundaries of text instances.
+    Args:
+        link_thr (float): The edge score threshold.
+    """
+    def __init__(self, link_thr, **kwargs):
+        assert isinstance(link_thr, float)
+        self.link_thr = link_thr
+    def __call__(self, preds, shape_list):
+        """
+        Args:
+            edges (ndarray): The edge array of shape N * 2, each row is a node
+                index pair that makes up an edge in graph.
+            scores (ndarray): The edge score array of shape (N,).
+            text_comps (ndarray): The text components.
+        Returns:
+            List[list[float]]: The predicted boundaries of text instances.
+        """
+        edges, scores, text_comps = preds
+        if edges is not None:
+            if isinstance(edges, paddle.Tensor):
+                edges = edges.numpy()
+            if isinstance(scores, paddle.Tensor):
+                scores = scores.numpy()
+            if isinstance(text_comps, paddle.Tensor):
+                text_comps = text_comps.numpy()
+            assert len(edges) == len(scores)
+            assert text_comps.ndim == 2
+            assert text_comps.shape[1] == 9
+            vertices, score_dict = graph_propagation(edges, scores, text_comps)
+            clusters = connected_components(vertices, score_dict, self.link_thr)
+            pred_labels = clusters2labels(clusters, text_comps.shape[0])
+            text_comps, pred_labels = remove_single(text_comps, pred_labels)
+            boundaries = comps2boundaries(text_comps, pred_labels)
+        else:
+            boundaries = []
+        boundaries, scores = self.resize_boundary(
+            boundaries, (1 / shape_list[0, 2:]).tolist()[::-1])
+        boxes_batch = [dict(points=boundaries, scores=scores)]
+        return boxes_batch
+    def resize_boundary(self, boundaries, scale_factor):
+        """Rescale boundaries via scale_factor.
+        Args:
+            boundaries (list[list[float]]): The boundary list. Each boundary
+            with size 2k+1 with k>=4.
+            scale_factor(ndarray): The scale factor of size (4,).
+        Returns:
+            boundaries (list[list[float]]): The scaled boundaries.
+        """
+        boxes = []
+        scores = []
+        for b in boundaries:
+            sz = len(b)
+            scores.append(b[-1])
+            b = (np.array(b[:sz - 1]) *
+                 (np.tile(scale_factor[:2], int(
+                     (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist()
+            boxes.append(np.array(b).reshape([-1, 2]))
+        return boxes, scores
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,3 +15,4 @@ premailer
 openpyxl
 attrdict
 Polygon3
+lanms-neo==1.0.2
--- a/tools/program.py
+++ b/tools/program.py
@@ -217,7 +217,7 @@ def train(config,
    use_srn = config['Architecture']['algorithm'] == "SRN"
    extra_input_models = [
        "SRN", "NRTR", "SAR", "SEED", "SVTR", "SPIN", "VisionLAN",
-        "RobustScanner"
+        "RobustScanner", 'DRRG'
    ]
    extra_input = False
    if config['Architecture']['algorithm'] == 'Distillation':
@@ -625,7 +625,7 @@ def preprocess(is_train=False):
        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
        'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'LayoutLMv2', 'PREN', 'FCE',
        'SVTR', 'ViTSTR', 'ABINet', 'DB++', 'TableMaster', 'SPIN', 'VisionLAN',
-        'Gestalt', 'SLANet', 'RobustScanner', 'CT'
+        'Gestalt', 'SLANet', 'RobustScanner', 'CT', 'DRRG'
    ]
    if use_xpu: