diff --git a/configs/det/det_db_icdar15_reader.yml b/configs/det/det_db_icdar15_reader.yml
deleted file mode 100755
index 0f99257b53a366ccdb2521ca742198adfe3ff556..0000000000000000000000000000000000000000
--- a/configs/det/det_db_icdar15_reader.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-TrainReader:
- reader_function: ppocr.data.det.dataset_traversal,TrainReader
- process_function: ppocr.data.det.db_process,DBProcessTrain
- num_workers: 8
- img_set_dir: ./train_data/icdar2015/text_localization/
- label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
-
-EvalReader:
- reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
- process_function: ppocr.data.det.db_process,DBProcessTest
- img_set_dir: ./train_data/icdar2015/text_localization/
- label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
- test_image_shape: [736, 1280]
-
-TestReader:
- reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
- process_function: ppocr.data.det.db_process,DBProcessTest
- infer_img:
- img_set_dir: ./train_data/icdar2015/text_localization/
- label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
- test_image_shape: [736, 1280]
- do_eval: True
diff --git a/configs/det/det_east_icdar15_reader.yml b/configs/det/det_east_icdar15_reader.yml
deleted file mode 100755
index 060ed4dd380d0457574c1d20be3225c7fd188108..0000000000000000000000000000000000000000
--- a/configs/det/det_east_icdar15_reader.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-TrainReader:
- reader_function: ppocr.data.det.dataset_traversal,TrainReader
- process_function: ppocr.data.det.east_process,EASTProcessTrain
- num_workers: 8
- img_set_dir: ./train_data/icdar2015/text_localization/
- label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
- background_ratio: 0.125
- min_crop_side_ratio: 0.1
- min_text_size: 10
-
-EvalReader:
- reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
- process_function: ppocr.data.det.east_process,EASTProcessTest
- img_set_dir: ./train_data/icdar2015/text_localization/
- label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-
-TestReader:
- reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
- process_function: ppocr.data.det.east_process,EASTProcessTest
- infer_img:
- img_set_dir: ./train_data/icdar2015/text_localization/
- label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
- do_eval: True
diff --git a/configs/det/det_mv3_db.yml b/configs/det/det_mv3_db.yml
old mode 100755
new mode 100644
index caa7bd4fa09752cff8b4d596e80b5729cce175bf..8af845dbdca1b3a8e72994c722fa922cf1f1d92b
--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
@@ -1,54 +1,133 @@
Global:
- algorithm: DB
use_gpu: true
epoch_num: 1200
log_smooth_window: 20
print_batch_step: 2
- save_model_dir: ./output/det_db/
- save_epoch_step: 200
+ save_model_dir: ./output/20201010/
+ save_epoch_step: 1200
# evaluation is run every 5000 iterations after the 4000th iteration
- eval_batch_step: [4000, 5000]
- train_batch_size_per_card: 16
- test_batch_size_per_card: 16
- image_shape: [3, 640, 640]
- reader_yml: ./configs/det/det_db_icdar15_reader.yml
- pretrain_weights: ./pretrain_models/MobileNetV3_large_x0_5_pretrained/
- checkpoints:
- save_res_path: ./output/det_db/predicts_db.txt
+ eval_batch_step: 8
+ # if pretrained_model is saved in static mode, load_static_weights must set to True
+ load_static_weights: True
+ cal_metric_during_train: False
+ pretrained_model: /home/zhoujun20/pretrain_models/MobileNetV3_large_x0_5_pretrained
+ checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
save_inference_dir:
-
-Architecture:
- function: ppocr.modeling.architectures.det_model,DetModel
+ use_visualdl: True
+ infer_img: doc/imgs_en/img_10.jpg
+ save_res_path: ./output/det_db/predicts_db.txt
-Backbone:
- function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
- scale: 0.5
- model_name: large
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ learning_rate:
+# name: Cosine
+ lr: 0.001
+# warmup_epoch: 0
+ regularizer:
+ name: 'L2'
+ factor: 0
-Head:
- function: ppocr.modeling.heads.det_db_head,DBHead
- model_name: large
- k: 50
- inner_channels: 96
- out_channels: 2
+Architecture:
+ type: det
+ algorithm: DB
+ Transform:
+ Backbone:
+ name: MobileNetV3
+ scale: 0.5
+ model_name: large
+ Neck:
+ name: FPN
+ out_channels: 256
+ Head:
+ name: DBHead
+ k: 50
Loss:
- function: ppocr.modeling.losses.det_db_loss,DBLoss
+ name: DBLoss
balance_loss: true
main_loss_type: DiceLoss
alpha: 5
beta: 10
ohem_ratio: 3
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
-
PostProcess:
- function: ppocr.postprocess.db_postprocess,DBPostProcess
+ name: DBPostProcess
thresh: 0.3
- box_thresh: 0.7
+ box_thresh: 0.6
max_candidates: 1000
- unclip_ratio: 2.0
+ unclip_ratio: 1.5
+
+Metric:
+ name: DetMetric
+ main_indicator: hmean
+
+TRAIN:
+ dataset:
+ name: SimpleDataSet
+ data_dir: /home/zhoujun20/detection/
+ file_list:
+ - /home/zhoujun20/detection/train_icdar2015_label.txt # dataset1
+ ratio_list: [1.0]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - IaaAugment:
+ augmenter_args:
+ - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+ - { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } }
+ - { 'type': Resize,'args': { 'size': [ 0.5,3 ] } }
+ - EastRandomCropData:
+ size: [ 640,640 ]
+ max_tries: 50
+ keep_ratio: true
+ - MakeBorderMap:
+ shrink_ratio: 0.4
+ thresh_min: 0.3
+ thresh_max: 0.7
+ - MakeShrinkMap:
+ shrink_ratio: 0.4
+ min_text_size: 8
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - keepKeys:
+ keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader将按照此顺序返回list
+ loader:
+ shuffle: True
+ drop_last: False
+ batch_size: 16
+ num_workers: 6
+
+EVAL:
+ dataset:
+ name: SimpleDataSet
+ data_dir: /home/zhoujun20/detection/
+ file_list:
+ - /home/zhoujun20/detection/test_icdar2015_label.txt
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - DetResizeForTest:
+ image_shape: [736,1280]
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - keepKeys:
+ keep_keys: ['image','shape','polys','ignore_tags']
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size: 1 # must be 1
+ num_workers: 6
\ No newline at end of file
diff --git a/configs/det/det_mv3_east.yml b/configs/det/det_mv3_east.yml
deleted file mode 100755
index 67b82fffff8c47e5ee5866ad22f238ece3822776..0000000000000000000000000000000000000000
--- a/configs/det/det_mv3_east.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-Global:
- algorithm: EAST
- use_gpu: true
- epoch_num: 100000
- log_smooth_window: 20
- print_batch_step: 5
- save_model_dir: ./output/det_east/
- save_epoch_step: 200
- eval_batch_step: [5000, 5000]
- train_batch_size_per_card: 16
- test_batch_size_per_card: 16
- image_shape: [3, 512, 512]
- reader_yml: ./configs/det/det_east_icdar15_reader.yml
- pretrain_weights: ./pretrain_models/MobileNetV3_large_x0_5_pretrained/
- checkpoints:
- save_res_path: ./output/det_east/predicts_east.txt
- save_inference_dir:
-
-Architecture:
- function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
- function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
- scale: 0.5
- model_name: large
-
-Head:
- function: ppocr.modeling.heads.det_east_head,EASTHead
- model_name: small
-
-Loss:
- function: ppocr.modeling.losses.det_east_loss,EASTLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
-
-PostProcess:
- function: ppocr.postprocess.east_postprocess,EASTPostPocess
- score_thresh: 0.8
- cover_thresh: 0.1
- nms_thresh: 0.2
-
diff --git a/configs/det/det_r50_vd_db.yml b/configs/det/det_r50_vd_db.yml
old mode 100755
new mode 100644
index 9a3b77e7cebce99f669d0b1be89ee56c84f41034..13a251324d36324d9949e86cf584a2467dc17948
--- a/configs/det/det_r50_vd_db.yml
+++ b/configs/det/det_r50_vd_db.yml
@@ -1,53 +1,132 @@
Global:
- algorithm: DB
use_gpu: true
epoch_num: 1200
log_smooth_window: 20
print_batch_step: 2
- save_model_dir: ./output/det_db/
- save_epoch_step: 200
- eval_batch_step: [5000, 5000]
- train_batch_size_per_card: 8
- test_batch_size_per_card: 16
- image_shape: [3, 640, 640]
- reader_yml: ./configs/det/det_db_icdar15_reader.yml
- pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
- save_res_path: ./output/det_db/predicts_db.txt
- checkpoints:
+ save_model_dir: ./output/20201010/
+ save_epoch_step: 1200
+ # evaluation is run every 5000 iterations after the 4000th iteration
+ eval_batch_step: 8
+ # if pretrained_model is saved in static mode, load_static_weights must set to True
+ load_static_weights: True
+ cal_metric_during_train: False
+ pretrained_model: /home/zhoujun20/pretrain_models/MobileNetV3_large_x0_5_pretrained
+ checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
save_inference_dir:
+ use_visualdl: True
+ infer_img: doc/imgs_en/img_10.jpg
+ save_res_path: ./output/det_db/predicts_db.txt
-Architecture:
- function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
- function: ppocr.modeling.backbones.det_resnet_vd,ResNet
- layers: 50
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ learning_rate:
+# name: Cosine
+ lr: 0.001
+# warmup_epoch: 0
+ regularizer:
+ name: 'L2'
+ factor: 0
-Head:
- function: ppocr.modeling.heads.det_db_head,DBHead
- model_name: large
- k: 50
- inner_channels: 256
- out_channels: 2
+Architecture:
+ type: det
+ algorithm: DB
+ Transform:
+ Backbone:
+ name: ResNet
+ layers: 50
+ Neck:
+ name: FPN
+ out_channels: 256
+ Head:
+ name: DBHead
+ k: 50
Loss:
- function: ppocr.modeling.losses.det_db_loss,DBLoss
+ name: DBLoss
balance_loss: true
main_loss_type: DiceLoss
alpha: 5
beta: 10
ohem_ratio: 3
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
-
PostProcess:
- function: ppocr.postprocess.db_postprocess,DBPostProcess
+ name: DBPostProcess
thresh: 0.3
- box_thresh: 0.7
+ box_thresh: 0.6
max_candidates: 1000
unclip_ratio: 1.5
-
+
+Metric:
+ name: DetMetric
+ main_indicator: hmean
+
+TRAIN:
+ dataset:
+ name: SimpleDataSet
+ data_dir: /home/zhoujun20/detection/
+ file_list:
+ - /home/zhoujun20/detection/train_icdar2015_label.txt # dataset1
+ ratio_list: [1.0]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - IaaAugment:
+ augmenter_args:
+ - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+ - { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } }
+ - { 'type': Resize,'args': { 'size': [ 0.5,3 ] } }
+ - EastRandomCropData:
+ size: [ 640,640 ]
+ max_tries: 50
+ keep_ratio: true
+ - MakeBorderMap:
+ shrink_ratio: 0.4
+ thresh_min: 0.3
+ thresh_max: 0.7
+ - MakeShrinkMap:
+ shrink_ratio: 0.4
+ min_text_size: 8
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - keepKeys:
+ keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader将按照此顺序返回list
+ loader:
+ shuffle: True
+ drop_last: False
+ batch_size: 16
+ num_workers: 6
+
+EVAL:
+ dataset:
+ name: SimpleDataSet
+ data_dir: /home/zhoujun20/detection/
+ file_list:
+ - /home/zhoujun20/detection/test_icdar2015_label.txt
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - DetResizeForTest:
+ image_shape: [736,1280]
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [ 0.485, 0.456, 0.406 ]
+ std: [ 0.229, 0.224, 0.225 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - keepKeys:
+ keep_keys: ['image','shape','polys','ignore_tags']
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size: 1 # must be 1
+ num_workers: 6
\ No newline at end of file
diff --git a/configs/det/det_r50_vd_east.yml b/configs/det/det_r50_vd_east.yml
deleted file mode 100755
index 8d86819937c902e47dded38ae0238fb8254d8ff0..0000000000000000000000000000000000000000
--- a/configs/det/det_r50_vd_east.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-Global:
- algorithm: EAST
- use_gpu: true
- epoch_num: 100000
- log_smooth_window: 20
- print_batch_step: 5
- save_model_dir: ./output/det_east/
- save_epoch_step: 200
- eval_batch_step: [5000, 5000]
- train_batch_size_per_card: 8
- test_batch_size_per_card: 16
- image_shape: [3, 512, 512]
- reader_yml: ./configs/det/det_east_icdar15_reader.yml
- pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
- save_res_path: ./output/det_east/predicts_east.txt
- checkpoints:
- save_inference_dir:
-
-Architecture:
- function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
- function: ppocr.modeling.backbones.det_resnet_vd,ResNet
- layers: 50
-
-Head:
- function: ppocr.modeling.heads.det_east_head,EASTHead
- model_name: large
-
-Loss:
- function: ppocr.modeling.losses.det_east_loss,EASTLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
-
-PostProcess:
- function: ppocr.postprocess.east_postprocess,EASTPostPocess
- score_thresh: 0.8
- cover_thresh: 0.1
- nms_thresh: 0.2
-
diff --git a/configs/det/det_r50_vd_sast_icdar15.yml b/configs/det/det_r50_vd_sast_icdar15.yml
deleted file mode 100644
index f1ecd61dc8ccb14fde98c2fc55cb2c9e630b5c44..0000000000000000000000000000000000000000
--- a/configs/det/det_r50_vd_sast_icdar15.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-Global:
- algorithm: SAST
- use_gpu: true
- epoch_num: 2000
- log_smooth_window: 20
- print_batch_step: 2
- save_model_dir: ./output/det_sast/
- save_epoch_step: 20
- eval_batch_step: 5000
- train_batch_size_per_card: 8
- test_batch_size_per_card: 8
- image_shape: [3, 512, 512]
- reader_yml: ./configs/det/det_sast_icdar15_reader.yml
- pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
- save_res_path: ./output/det_sast/predicts_sast.txt
- checkpoints:
- save_inference_dir:
-
-Architecture:
- function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
- function: ppocr.modeling.backbones.det_resnet_vd_sast,ResNet
- layers: 50
-
-Head:
- function: ppocr.modeling.heads.det_sast_head,SASTHead
- model_name: large
- only_fpn_up: False
-# with_cab: False
- with_cab: True
-
-Loss:
- function: ppocr.modeling.losses.det_sast_loss,SASTLoss
-
-Optimizer:
- function: ppocr.optimizer,RMSProp
- base_lr: 0.001
- decay:
- function: piecewise_decay
- boundaries: [30000, 50000, 80000, 100000, 150000]
- decay_rate: 0.3
-
-PostProcess:
- function: ppocr.postprocess.sast_postprocess,SASTPostProcess
- score_thresh: 0.5
- sample_pts_num: 2
- nms_thresh: 0.2
- expand_scale: 1.0
- shrink_ratio_of_width: 0.3
\ No newline at end of file
diff --git a/configs/det/det_r50_vd_sast_totaltext.yml b/configs/det/det_r50_vd_sast_totaltext.yml
deleted file mode 100644
index ec42ce6d4bafd0c5d4360a255f35d07e83f90787..0000000000000000000000000000000000000000
--- a/configs/det/det_r50_vd_sast_totaltext.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-Global:
- algorithm: SAST
- use_gpu: true
- epoch_num: 2000
- log_smooth_window: 20
- print_batch_step: 2
- save_model_dir: ./output/det_sast/
- save_epoch_step: 20
- eval_batch_step: 5000
- train_batch_size_per_card: 8
- test_batch_size_per_card: 1
- image_shape: [3, 512, 512]
- reader_yml: ./configs/det/det_sast_totaltext_reader.yml
- pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
- save_res_path: ./output/det_sast/predicts_sast.txt
- checkpoints:
- save_inference_dir:
-
-Architecture:
- function: ppocr.modeling.architectures.det_model,DetModel
-
-Backbone:
- function: ppocr.modeling.backbones.det_resnet_vd_sast,ResNet
- layers: 50
-
-Head:
- function: ppocr.modeling.heads.det_sast_head,SASTHead
- model_name: large
- only_fpn_up: False
- # with_cab: False
- with_cab: True
-
-Loss:
- function: ppocr.modeling.losses.det_sast_loss,SASTLoss
-
-Optimizer:
- function: ppocr.optimizer,RMSProp
- base_lr: 0.001
- decay:
- function: piecewise_decay
- boundaries: [30000, 50000, 80000, 100000, 150000]
- decay_rate: 0.3
-
-PostProcess:
- function: ppocr.postprocess.sast_postprocess,SASTPostProcess
- score_thresh: 0.5
- sample_pts_num: 6
- nms_thresh: 0.2
- expand_scale: 1.2
- shrink_ratio_of_width: 0.2
\ No newline at end of file
diff --git a/configs/det/det_sast_icdar15_reader.yml b/configs/det/det_sast_icdar15_reader.yml
deleted file mode 100644
index ee45a85da7452e2069b0d7467b1ccfc44dd656b7..0000000000000000000000000000000000000000
--- a/configs/det/det_sast_icdar15_reader.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-TrainReader:
- reader_function: ppocr.data.det.dataset_traversal,TrainReader
- process_function: ppocr.data.det.sast_process,SASTProcessTrain
- num_workers: 8
- img_set_dir: ./train_data/
- label_file_path: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
- data_ratio_list: [0.1, 0.45, 0.3, 0.15]
- min_crop_side_ratio: 0.3
- min_crop_size: 24
- min_text_size: 4
- max_text_size: 512
-
-EvalReader:
- reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
- process_function: ppocr.data.det.sast_process,SASTProcessTest
- img_set_dir: ./train_data/icdar2015/text_localization/
- label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
- max_side_len: 1536
-
-TestReader:
- reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
- process_function: ppocr.data.det.sast_process,SASTProcessTest
- infer_img: ./train_data/icdar2015/text_localization/ch4_test_images/img_11.jpg
- max_side_len: 1536
diff --git a/configs/det/det_sast_totaltext_reader.yml b/configs/det/det_sast_totaltext_reader.yml
deleted file mode 100644
index 92503d9f0e2b57f0d22b15591c5400185daf2afa..0000000000000000000000000000000000000000
--- a/configs/det/det_sast_totaltext_reader.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-TrainReader:
- reader_function: ppocr.data.det.dataset_traversal,TrainReader
- process_function: ppocr.data.det.sast_process,SASTProcessTrain
- num_workers: 8
- img_set_dir: ./train_data/
- label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
- data_ratio_list: [0.5, 0.5]
- min_crop_side_ratio: 0.3
- min_crop_size: 24
- min_text_size: 4
- max_text_size: 512
-
-EvalReader:
- reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
- process_function: ppocr.data.det.sast_process,SASTProcessTest
- img_set_dir: ./train_data/
- label_file_path: ./train_data/total_text_icdar_14pt/test_label_json.txt
- max_side_len: 768
-
-TestReader:
- reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
- process_function: ppocr.data.det.sast_process,SASTProcessTest
- infer_img: ./train_data/afs/total_text/Images/Test/img623.jpg
- max_side_len: 768
diff --git a/configs/rec/rec_benchmark_reader.yml b/configs/rec/rec_benchmark_reader.yml
deleted file mode 100755
index 524f2f68bac92ff6ffe3ff3b34e461d2adc81e41..0000000000000000000000000000000000000000
--- a/configs/rec/rec_benchmark_reader.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-TrainReader:
- reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
- num_workers: 8
- lmdb_sets_dir: ./train_data/data_lmdb_release/training/
-
-EvalReader:
- reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
- lmdb_sets_dir: ./train_data/data_lmdb_release/validation/
-
-TestReader:
- reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
- lmdb_sets_dir: ./train_data/data_lmdb_release/evaluation/
diff --git a/configs/rec/rec_chinese_common_train.yml b/configs/rec/rec_chinese_common_train.yml
deleted file mode 100644
index 0d897459e0a631a4ac1fa10973f18e8640078c1b..0000000000000000000000000000000000000000
--- a/configs/rec/rec_chinese_common_train.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-Global:
- algorithm: CRNN
- use_gpu: true
- epoch_num: 3000
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: ./output/rec_CRNN
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 128
- test_batch_size_per_card: 128
- image_shape: [3, 32, 320]
- max_text_length: 25
- character_type: ch
- character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
- loss_type: ctc
- distort: false
- use_space_char: false
- reader_yml: ./configs/rec/rec_chinese_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
- function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
- layers: 34
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 256
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.0005
- beta1: 0.9
- beta2: 0.999
diff --git a/configs/rec/rec_chinese_lite_train.yml b/configs/rec/rec_chinese_lite_train.yml
deleted file mode 100755
index 95a39a3b4d349973356594e15a23f951e27dc7c5..0000000000000000000000000000000000000000
--- a/configs/rec/rec_chinese_lite_train.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-Global:
- algorithm: CRNN
- use_gpu: true
- epoch_num: 3000
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: ./output/rec_CRNN
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 320]
- max_text_length: 25
- character_type: ch
- character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
- loss_type: ctc
- distort: false
- use_space_char: false
- reader_yml: ./configs/rec/rec_chinese_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
- function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
- scale: 0.5
- model_name: small
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 48
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.0005
- beta1: 0.9
- beta2: 0.999
diff --git a/configs/rec/rec_chinese_reader.yml b/configs/rec/rec_chinese_reader.yml
deleted file mode 100755
index a44efd9911d4595cc519b660e868aa9a1e0f144b..0000000000000000000000000000000000000000
--- a/configs/rec/rec_chinese_reader.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-TrainReader:
- reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
- num_workers: 8
- img_set_dir: ./train_data
- label_file_path: ./train_data/rec_gt_train.txt
-
-EvalReader:
- reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
- img_set_dir: ./train_data
- label_file_path: ./train_data/rec_gt_test.txt
-
-TestReader:
- reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
diff --git a/configs/rec/rec_icdar15_reader.yml b/configs/rec/rec_icdar15_reader.yml
deleted file mode 100755
index 322d5f25e0ef0fab167c0c39b38fa488a5546f1b..0000000000000000000000000000000000000000
--- a/configs/rec/rec_icdar15_reader.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-TrainReader:
- reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
- num_workers: 8
- img_set_dir: ./train_data/ic15_data
- label_file_path: ./train_data/ic15_data/rec_gt_train.txt
-
-EvalReader:
- reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
- img_set_dir: ./train_data/ic15_data
- label_file_path: ./train_data/ic15_data/rec_gt_test.txt
-
-TestReader:
- reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
diff --git a/configs/rec/rec_icdar15_train.yml b/configs/rec/rec_icdar15_train.yml
deleted file mode 100755
index 98a38e7477f725c605c0cf017b6a7a4b469f7f3b..0000000000000000000000000000000000000000
--- a/configs/rec/rec_icdar15_train.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-Global:
- algorithm: CRNN
- use_gpu: true
- epoch_num: 1000
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: ./output/rec_CRNN
- save_epoch_step: 300
- eval_batch_step: 500
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: ctc
- distort: true
- debug: false
- reader_yml: ./configs/rec/rec_icdar15_reader.yml
- pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy
- checkpoints:
- save_inference_dir:
- infer_img:
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
- function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
- scale: 0.5
- model_name: large
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 96
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.0005
- beta1: 0.9
- beta2: 0.999
- decay:
- function: cosine_decay
- step_each_epoch: 20
- total_epoch: 1000
diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml
old mode 100755
new mode 100644
index d2e096fb1c51588a6bd2c7ca8321cf817d435f23..3541a9d72155d3da669ae0479269066a673d6fb9
--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -1,43 +1,108 @@
Global:
- algorithm: CRNN
- use_gpu: true
- epoch_num: 72
+ use_gpu: false
+ epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
- save_model_dir: output/rec_CRNN
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: ctc
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
+ save_model_dir: ./output/rec/test/
+ save_epoch_step: 500
+ # evaluation is run every 5000 iterations after the 4000th iteration
+ eval_batch_step: 127
+ # if pretrained_model is saved in static mode, load_static_weights must set to True
+ load_static_weights: True
+ cal_metric_during_train: True
+ pretrained_model:
+ checkpoints: #output/rec/rec_crnn/best_accuracy
save_inference_dir:
- infer_img:
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
- function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
- scale: 0.5
- model_name: large
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 96
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+ use_visualdl: False
+ infer_img: doc/imgs_words/ch/word_1.jpg
+ # for data or label process
+ max_text_length: 80
+ character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+ character_type: 'ch'
+ use_space_char: False
+ infer_mode: False
+ use_tps: False
+
Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
+ name: Adam
beta1: 0.9
beta2: 0.999
+ learning_rate:
+ name: Cosine
+ lr: 0.001
+ warmup_epoch: 4
+ regularizer:
+ name: 'L2'
+ factor: 0.00001
+
+Architecture:
+ type: rec
+ algorithm: CRNN
+ Transform:
+ Backbone:
+ name: MobileNetV3
+ scale: 0.5
+ model_name: small
+ small_stride: [ 1, 2, 2, 2 ]
+ Neck:
+ name: SequenceEncoder
+ encoder_type: fc
+ hidden_size: 96
+ Head:
+ name: CTC
+ fc_decay: 0.00001
+
+Loss:
+ name: CTCLoss
+
+PostProcess:
+ name: CTCLabelDecode
+
+Metric:
+ name: RecMetric
+ main_indicator: acc
+
+TRAIN:
+ dataset:
+ name: SimpleDataSet
+ data_dir: /home/zhoujun20/rec
+ file_list:
+ - /home/zhoujun20/rec/real_data.txt # dataset1
+ ratio_list: [ 0.4,0.6 ]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - CTCLabelEncode: # Class handling label
+ - RecAug:
+ - RecResizeImg:
+ image_shape: [ 3,32,320 ]
+ - keepKeys:
+ keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+ loader:
+ batch_size: 256
+ shuffle: True
+ drop_last: True
+ num_workers: 6
+
+EVAL:
+ dataset:
+ name: SimpleDataSet
+ data_dir: /home/zhoujun20/rec
+ file_list:
+ - /home/zhoujun20/rec/label_val_all.txt
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - CTCLabelEncode: # Class handling label
+ - RecResizeImg:
+ image_shape: [ 3,32,320 ]
+ - keepKeys:
+ keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size: 256
+ num_workers: 6
diff --git a/configs/rec/rec_mv3_none_bilstm_ctc_lmdb.yml b/configs/rec/rec_mv3_none_bilstm_ctc_lmdb.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c26752bac37d864a8d917401286e3d11babcfb47
--- /dev/null
+++ b/configs/rec/rec_mv3_none_bilstm_ctc_lmdb.yml
@@ -0,0 +1,106 @@
+Global:
+ use_gpu: true
+ epoch_num: 500
+ log_smooth_window: 20
+ print_batch_step: 1
+ save_model_dir: ./output/rec/test/
+ save_epoch_step: 500
+ # evaluation is run every 5000 iterations after the 4000th iteration
+ eval_batch_step: 1016
+ # if pretrained_model is saved in static mode, load_static_weights must set to True
+ load_static_weights: True
+ cal_metric_during_train: True
+ pretrained_model:
+ checkpoints: #output/rec/rec_crnn/best_accuracy
+ save_inference_dir:
+ use_visualdl: True
+ infer_img: doc/imgs_words/ch/word_1.jpg
+ # for data or label process
+ max_text_length: 80
+ character_dict_path: /home/zhoujun20/rec/lmdb/dict.txt
+ character_type: 'ch'
+ use_space_char: True
+ infer_mode: False
+ use_tps: False
+
+
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ learning_rate:
+ name: Cosine
+ lr: 0.0005
+ warmup_epoch: 1
+ regularizer:
+ name: 'L2'
+ factor: 0.00001
+
+Architecture:
+ type: rec
+ algorithm: CRNN
+ Transform:
+ Backbone:
+ name: MobileNetV3
+ scale: 0.5
+ model_name: small
+ small_stride: [ 1, 2, 2, 2 ]
+ Neck:
+ name: SequenceEncoder
+ encoder_type: rnn
+ hidden_size: 48
+ Head:
+ name: CTC
+ fc_decay: 0.00001
+
+Loss:
+ name: CTCLoss
+
+PostProcess:
+ name: CTCLabelDecode
+
+Metric:
+ name: RecMetric
+ main_indicator: acc
+
+TRAIN:
+ dataset:
+ name: LMDBDateSet
+ file_list:
+ - /home/zhoujun20/rec/lmdb/train # dataset1
+ ratio_list: [ 0.4,0.6 ]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - CTCLabelEncode: # Class handling label
+ - RecAug:
+ - RecResizeImg:
+ image_shape: [ 3,32,320 ]
+ - keepKeys:
+ keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+ loader:
+ batch_size: 256
+ shuffle: True
+ drop_last: True
+ num_workers: 6
+
+EVAL:
+ dataset:
+ name: LMDBDateSet
+ file_list:
+ - /home/zhoujun20/rec/lmdb/val
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - CTCLabelEncode: # Class handling label
+ - RecResizeImg:
+ image_shape: [ 3,32,320 ]
+ - keepKeys:
+ keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size: 256
+ num_workers: 6
diff --git a/configs/rec/rec_mv3_none_none_ctc.yml b/configs/rec/rec_mv3_none_none_ctc.yml
deleted file mode 100755
index ceec09ce6f3b6cb2238d6fb2e15f510cb31e0fd8..0000000000000000000000000000000000000000
--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-Global:
- algorithm: Rosetta
- use_gpu: true
- epoch_num: 72
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: output/rec_Rosetta
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: ctc
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
- function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
- scale: 0.5
- model_name: large
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: reshape
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
diff --git a/configs/rec/rec_mv3_tps_bilstm_attn.yml b/configs/rec/rec_mv3_tps_bilstm_attn.yml
deleted file mode 100755
index 7fc4f6799459bf9fbcd25e1609aeca5e3fd12a74..0000000000000000000000000000000000000000
--- a/configs/rec/rec_mv3_tps_bilstm_attn.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-Global:
- algorithm: RARE
- use_gpu: true
- epoch_num: 72
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: output/rec_RARE
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: attention
- tps: true
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
- function: ppocr.modeling.stns.tps,TPS
- num_fiducial: 20
- loc_lr: 0.1
- model_name: small
-
-Backbone:
- function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
- scale: 0.5
- model_name: large
-
-Head:
- function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 96
- Attention:
- decoder_size: 96
- word_vector_dim: 96
-
-Loss:
- function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
diff --git a/configs/rec/rec_mv3_tps_bilstm_ctc.yml b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
deleted file mode 100755
index 4b9660bcdec60989a6d9b9926c40814a83db6f39..0000000000000000000000000000000000000000
--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-Global:
- algorithm: STARNet
- use_gpu: true
- epoch_num: 72
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: output/rec_STARNet
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: ctc
- tps: true
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
- function: ppocr.modeling.stns.tps,TPS
- num_fiducial: 20
- loc_lr: 0.1
- model_name: small
-
-Backbone:
- function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
- scale: 0.5
- model_name: large
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 96
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
old mode 100755
new mode 100644
index b71e8feae7ac8f235bf471101efd4383c61bfab2..3066cfa8b05c4c5a45a18a9cb95d2d9e4736e7b7
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@@ -1,43 +1,106 @@
Global:
- algorithm: CRNN
- use_gpu: true
- epoch_num: 72
+ use_gpu: false
+ epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
- save_model_dir: output/rec_CRNN
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: ctc
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
+ save_model_dir: ./output/rec/test/
+ save_epoch_step: 500
+ # evaluation is run every 5000 iterations after the 4000th iteration
+ eval_batch_step: 127
+ # if pretrained_model is saved in static mode, load_static_weights must set to True
+ load_static_weights: True
+ cal_metric_during_train: True
+ pretrained_model:
+ checkpoints: #output/rec/rec_crnn/best_accuracy
save_inference_dir:
- infer_img:
-
+ use_visualdl: False
+ infer_img: doc/imgs_words/ch/word_1.jpg
+ # for data or label process
+ max_text_length: 80
+ character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+ character_type: 'ch'
+ use_space_char: False
+ infer_mode: False
+ use_tps: False
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
- function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
- layers: 34
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 256
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
+ name: Adam
beta1: 0.9
beta2: 0.999
+ learning_rate:
+ name: Cosine
+ lr: 0.001
+ warmup_epoch: 4
+ regularizer:
+ name: 'L2'
+ factor: 0.00001
+
+Architecture:
+ type: rec
+ algorithm: CRNN
+ Transform:
+ Backbone:
+ name: ResNet
+ layers: 200
+ Neck:
+ name: SequenceEncoder
+ encoder_type: fc
+ hidden_size: 96
+ Head:
+ name: CTC
+ fc_decay: 0.00001
+
+Loss:
+ name: CTCLoss
+
+PostProcess:
+ name: CTCLabelDecode
+
+Metric:
+ name: RecMetric
+ main_indicator: acc
+
+TRAIN:
+ dataset:
+ name: SimpleDataSet
+ data_dir: /home/zhoujun20/rec
+ file_list:
+ - /home/zhoujun20/rec/real_data.txt # dataset1
+ ratio_list: [ 0.4,0.6 ]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - CTCLabelEncode: # Class handling label
+ - RecAug:
+ - RecResizeImg:
+ image_shape: [ 3,32,320 ]
+ - keepKeys:
+ keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+ loader:
+ batch_size: 256
+ shuffle: True
+ drop_last: True
+ num_workers: 6
+
+EVAL:
+ dataset:
+ name: SimpleDataSet
+ data_dir: /home/zhoujun20/rec
+ file_list:
+ - /home/zhoujun20/rec/label_val_all.txt
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - CTCLabelEncode: # Class handling label
+ - RecResizeImg:
+ image_shape: [ 3,32,320 ]
+ - keepKeys:
+ keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size: 256
+ num_workers: 6
diff --git a/configs/rec/rec_r34_vd_none_none_ctc.yml b/configs/rec/rec_r34_vd_none_none_ctc.yml
deleted file mode 100755
index d9c9458d6d8fcdb9df590b0093d54b71e3e53fcc..0000000000000000000000000000000000000000
--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-Global:
- algorithm: Rosetta
- use_gpu: true
- epoch_num: 72
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: output/rec_Rosetta
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: ctc
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
- function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
- layers: 34
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: reshape
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
diff --git a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
deleted file mode 100755
index dfcd97fad67081a7ed04ad1d67ff298c9f553b0c..0000000000000000000000000000000000000000
--- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-Global:
- algorithm: RARE
- use_gpu: true
- epoch_num: 72
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: output/rec_RARE
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: attention
- tps: true
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
- function: ppocr.modeling.stns.tps,TPS
- num_fiducial: 20
- loc_lr: 0.1
- model_name: large
-
-Backbone:
- function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
- layers: 34
-
-Head:
- function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 256
- Attention:
- decoder_size: 128
- word_vector_dim: 128
-
-Loss:
- function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
diff --git a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
deleted file mode 100755
index 574a088cc024541e086bedc0bc0a52082e9e7eb2..0000000000000000000000000000000000000000
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-Global:
- algorithm: STARNet
- use_gpu: true
- epoch_num: 72
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: output/rec_STARNet
- save_epoch_step: 3
- eval_batch_step: 2000
- train_batch_size_per_card: 256
- test_batch_size_per_card: 256
- image_shape: [3, 32, 100]
- max_text_length: 25
- character_type: en
- loss_type: ctc
- tps: true
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-TPS:
- function: ppocr.modeling.stns.tps,TPS
- num_fiducial: 20
- loc_lr: 0.1
- model_name: large
-
-Backbone:
- function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
- layers: 34
-
-Head:
- function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
- encoder_type: rnn
- SeqRNN:
- hidden_size: 256
-
-Loss:
- function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.001
- beta1: 0.9
- beta2: 0.999
diff --git a/configs/rec/rec_r50fpn_vd_none_srn.yml b/configs/rec/rec_r50fpn_vd_none_srn.yml
deleted file mode 100755
index 30709e479f8da56b6bd7fe9ebf817a27bff9cc38..0000000000000000000000000000000000000000
--- a/configs/rec/rec_r50fpn_vd_none_srn.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-Global:
- algorithm: SRN
- use_gpu: true
- epoch_num: 72
- log_smooth_window: 20
- print_batch_step: 10
- save_model_dir: output/rec_pvam_withrotate
- save_epoch_step: 1
- eval_batch_step: 8000
- train_batch_size_per_card: 64
- test_batch_size_per_card: 1
- image_shape: [1, 64, 256]
- max_text_length: 25
- character_type: en
- loss_type: srn
- num_heads: 8
- average_window: 0.15
- max_average_window: 15625
- min_average_window: 10000
- reader_yml: ./configs/rec/rec_benchmark_reader.yml
- pretrain_weights:
- checkpoints:
- save_inference_dir:
- infer_img:
-
-Architecture:
- function: ppocr.modeling.architectures.rec_model,RecModel
-
-Backbone:
- function: ppocr.modeling.backbones.rec_resnet_fpn,ResNet
- layers: 50
-
-Head:
- function: ppocr.modeling.heads.rec_srn_all_head,SRNPredict
- encoder_type: rnn
- num_encoder_TUs: 2
- num_decoder_TUs: 4
- hidden_dims: 512
- SeqRNN:
- hidden_size: 256
-
-Loss:
- function: ppocr.modeling.losses.rec_srn_loss,SRNLoss
-
-Optimizer:
- function: ppocr.optimizer,AdamDecay
- base_lr: 0.0001
- beta1: 0.9
- beta2: 0.999
diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py
old mode 100755
new mode 100644
index abf198b97e6e818e1fbe59006f98492640bcee54..2a3e277106bb9292d9cc324e634d46b2ce3c51d5
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -11,3 +11,114 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import numpy as np
+import paddle
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+
+import copy
+from paddle.io import DataLoader, DistributedBatchSampler, BatchSampler
+import paddle.distributed as dist
+
+from ppocr.data.imaug import transform, create_operators
+
+__all__ = ['build_dataloader', 'transform', 'create_operators']
+
+
+def build_dataset(config, global_config):
+ from ppocr.data.dataset import SimpleDataSet, LMDBDateSet
+ support_dict = ['SimpleDataSet', 'LMDBDateSet']
+
+ module_name = config.pop('name')
+ assert module_name in support_dict, Exception(
+ 'DataSet only support {}'.format(support_dict))
+
+ dataset = eval(module_name)(config, global_config)
+ return dataset
+
+
+def build_dataloader(config, device, distributed=False, global_config=None):
+ from ppocr.data.dataset import BatchBalancedDataLoader
+
+ config = copy.deepcopy(config)
+ dataset_config = config['dataset']
+
+ _dataset_list = []
+ file_list = dataset_config.pop('file_list')
+ if len(file_list) == 1:
+ ratio_list = [1.0]
+ else:
+ ratio_list = dataset_config.pop('ratio_list')
+ for file in file_list:
+ dataset_config['file_list'] = file
+ _dataset = build_dataset(dataset_config, global_config)
+ _dataset_list.append(_dataset)
+ data_loader = BatchBalancedDataLoader(_dataset_list, ratio_list,
+ distributed, device, config['loader'])
+ return data_loader, _dataset.info_dict
+
+
+def test_loader():
+ import time
+ from tools.program import load_config, ArgsParser
+
+ FLAGS = ArgsParser().parse_args()
+ config = load_config(FLAGS.config)
+
+ place = paddle.CPUPlace()
+ paddle.disable_static(place)
+ import time
+
+ data_loader, _ = build_dataloader(
+ config['TRAIN'], place, global_config=config['Global'])
+ start = time.time()
+ print(len(data_loader))
+ for epoch in range(1):
+ print('epoch {} ****************'.format(epoch))
+ for i, batch in enumerate(data_loader):
+ if i > len(data_loader):
+ break
+ t = time.time() - start
+ start = time.time()
+ print('{}, batch : {} ,time {}'.format(i, len(batch[0]), t))
+
+ continue
+ import matplotlib.pyplot as plt
+
+ from matplotlib import pyplot as plt
+ import cv2
+ fig = plt.figure()
+ # # cv2.imwrite('img.jpg',batch[0].numpy()[0].transpose((1,2,0)))
+ # # cv2.imwrite('bmap.jpg',batch[1].numpy()[0])
+ # # cv2.imwrite('bmask.jpg',batch[2].numpy()[0])
+ # # cv2.imwrite('smap.jpg',batch[3].numpy()[0])
+ # # cv2.imwrite('smask.jpg',batch[4].numpy()[0])
+ plt.title('img')
+ plt.imshow(batch[0].numpy()[0].transpose((1, 2, 0)))
+ # plt.figure()
+ # plt.title('bmap')
+ # plt.imshow(batch[1].numpy()[0],cmap='Greys')
+ # plt.figure()
+ # plt.title('bmask')
+ # plt.imshow(batch[2].numpy()[0],cmap='Greys')
+ # plt.figure()
+ # plt.title('smap')
+ # plt.imshow(batch[3].numpy()[0],cmap='Greys')
+ # plt.figure()
+ # plt.title('smask')
+ # plt.imshow(batch[4].numpy()[0],cmap='Greys')
+ # plt.show()
+ # break
+
+
+if __name__ == '__main__':
+ test_loader()
diff --git a/ppocr/data/dataset.py b/ppocr/data/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6183267d028cac43cf534eb9999d1289efcc5b19
--- /dev/null
+++ b/ppocr/data/dataset.py
@@ -0,0 +1,300 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import numpy as np
+import os
+import lmdb
+import random
+import signal
+import paddle
+from paddle.io import Dataset, DataLoader, DistributedBatchSampler, BatchSampler
+
+from .imaug import transform, create_operators
+from ppocr.utils.logging import get_logger
+
+
+def term_mp(sig_num, frame):
+ """ kill all child processes
+ """
+ pid = os.getpid()
+ pgid = os.getpgid(os.getpid())
+ print("main proc {} exit, kill process group " "{}".format(pid, pgid))
+ os.killpg(pgid, signal.SIGKILL)
+
+
+signal.signal(signal.SIGINT, term_mp)
+signal.signal(signal.SIGTERM, term_mp)
+
+
+class ModeException(Exception):
+ """
+ ModeException
+ """
+
+ def __init__(self, message='', mode=''):
+ message += "\nOnly the following 3 modes are supported: " \
+ "train, valid, test. Given mode is {}".format(mode)
+ super(ModeException, self).__init__(message)
+
+
+class SampleNumException(Exception):
+ """
+ SampleNumException
+ """
+
+ def __init__(self, message='', sample_num=0, batch_size=1):
+ message += "\nError: The number of the whole data ({}) " \
+ "is smaller than the batch_size ({}), and drop_last " \
+ "is turnning on, so nothing will feed in program, " \
+ "Terminated now. Please reset batch_size to a smaller " \
+ "number or feed more data!".format(sample_num, batch_size)
+ super(SampleNumException, self).__init__(message)
+
+
+def get_file_list(file_list, data_dir, delimiter='\t'):
+ """
+ read label list from file and shuffle the list
+
+ Args:
+ params(dict):
+ """
+ if isinstance(file_list, str):
+ file_list = [file_list]
+ data_source_list = []
+ for file in file_list:
+ with open(file) as f:
+ full_lines = [line.strip() for line in f]
+ for line in full_lines:
+ try:
+ img_path, label = line.split(delimiter)
+ except:
+ logger = get_logger()
+ logger.warning('label error in {}'.format(line))
+ img_path = os.path.join(data_dir, img_path)
+ data = {'img_path': img_path, 'label': label}
+ data_source_list.append(data)
+ return data_source_list
+
+
+class LMDBDateSet(Dataset):
+ def __init__(self, config, global_config):
+ super(LMDBDateSet, self).__init__()
+ self.data_list = self.load_lmdb_dataset(
+ config['file_list'], global_config['max_text_length'])
+ random.shuffle(self.data_list)
+
+ self.ops = create_operators(config['transforms'], global_config)
+
+ # for rec
+ character = ''
+ for op in self.ops:
+ if hasattr(op, 'character'):
+ character = getattr(op, 'character')
+
+ self.info_dict = {'character': character}
+
+ def load_lmdb_dataset(self, data_dir, max_text_length):
+ self.env = lmdb.open(
+ data_dir,
+ max_readers=32,
+ readonly=True,
+ lock=False,
+ readahead=False,
+ meminit=False)
+ if not self.env:
+ print('cannot create lmdb from %s' % (data_dir))
+ exit(0)
+
+ filtered_index_list = []
+ with self.env.begin(write=False) as txn:
+ nSamples = int(txn.get('num-samples'.encode()))
+ self.nSamples = nSamples
+ for index in range(self.nSamples):
+ index += 1 # lmdb starts with 1
+ label_key = 'label-%09d'.encode() % index
+ label = txn.get(label_key).decode('utf-8')
+ if len(label) > max_text_length:
+ # print(f'The length of the label is longer than max_length: length
+ # {len(label)}, {label} in dataset {self.root}')
+ continue
+
+ # By default, images containing characters which are not in opt.character are filtered.
+ # You can add [UNK] token to `opt.character` in utils.py instead of this filtering.
+ filtered_index_list.append(index)
+ return filtered_index_list
+
+ def print_lmdb_sets_info(self, lmdb_sets):
+ lmdb_info_strs = []
+ for dataset_idx in range(len(lmdb_sets)):
+ tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
+ lmdb_sets[dataset_idx]['num_samples'])
+ lmdb_info_strs.append(tmp_str)
+ lmdb_info_strs = ''.join(lmdb_info_strs)
+ logger = get_logger()
+ logger.info("DataSummary:" + lmdb_info_strs)
+ return
+
+ def __getitem__(self, idx):
+ idx = self.data_list[idx]
+ with self.env.begin(write=False) as txn:
+ label_key = 'label-%09d'.encode() % idx
+ label = txn.get(label_key)
+ if label is not None:
+ label = label.decode('utf-8')
+ img_key = 'image-%09d'.encode() % idx
+ imgbuf = txn.get(img_key)
+ data = {'image': imgbuf, 'label': label}
+ outs = transform(data, self.ops)
+ else:
+ outs = None
+ if outs is None:
+ return self.__getitem__(np.random.randint(self.__len__()))
+ return outs
+
+ def __len__(self):
+ return len(self.data_list)
+
+
+class SimpleDataSet(Dataset):
+ def __init__(self, config, global_config):
+ super(SimpleDataSet, self).__init__()
+ delimiter = config.get('delimiter', '\t')
+ self.data_list = get_file_list(config['file_list'], config['data_dir'],
+ delimiter)
+ random.shuffle(self.data_list)
+
+ self.ops = create_operators(config['transforms'], global_config)
+
+ # for rec
+ character = ''
+ for op in self.ops:
+ if hasattr(op, 'character'):
+ character = getattr(op, 'character')
+
+ self.info_dict = {'character': character}
+
+ def __getitem__(self, idx):
+ data = copy.deepcopy(self.data_list[idx])
+ with open(data['img_path'], 'rb') as f:
+ img = f.read()
+ data['image'] = img
+ outs = transform(data, self.ops)
+ if outs is None:
+ return self.__getitem__(np.random.randint(self.__len__()))
+ return outs
+
+ def __len__(self):
+ return len(self.data_list)
+
+
+class BatchBalancedDataLoader(object):
+ def __init__(self,
+ dataset_list: list,
+ ratio_list: list,
+ distributed,
+ device,
+ loader_args: dict):
+ """
+ 对datasetlist里的dataset按照ratio_list里对应的比例组合,似的每个batch里的数据按按照比例采样的
+ :param dataset_list: 数据集列表
+ :param ratio_list: 比例列表
+ :param loader_args: dataloader的配置
+ """
+ assert sum(ratio_list) == 1 and len(dataset_list) == len(ratio_list)
+
+ self.dataset_len = 0
+ self.data_loader_list = []
+ self.dataloader_iter_list = []
+ all_batch_size = loader_args.pop('batch_size')
+ batch_size_list = list(
+ map(int, [max(1.0, all_batch_size * x) for x in ratio_list]))
+ remain_num = all_batch_size - sum(batch_size_list)
+ batch_size_list[np.argmax(ratio_list)] += remain_num
+
+ for _dataset, _batch_size in zip(dataset_list, batch_size_list):
+ if distributed:
+ batch_sampler_class = DistributedBatchSampler
+ else:
+ batch_sampler_class = BatchSampler
+ batch_sampler = batch_sampler_class(
+ dataset=_dataset,
+ batch_size=_batch_size,
+ shuffle=loader_args['shuffle'],
+ drop_last=loader_args['drop_last'], )
+ _data_loader = DataLoader(
+ dataset=_dataset,
+ batch_sampler=batch_sampler,
+ places=device,
+ num_workers=loader_args['num_workers'],
+ return_list=True, )
+ self.data_loader_list.append(_data_loader)
+ self.dataloader_iter_list.append(iter(_data_loader))
+ self.dataset_len += len(_dataset)
+
+ def __iter__(self):
+ return self
+
+ def __len__(self):
+ return min([len(x) for x in self.data_loader_list])
+
+ def __next__(self):
+ batch = []
+ for i, data_loader_iter in enumerate(self.dataloader_iter_list):
+ try:
+ _batch_i = next(data_loader_iter)
+ batch.append(_batch_i)
+ except StopIteration:
+ self.dataloader_iter_list[i] = iter(self.data_loader_list[i])
+ _batch_i = next(self.dataloader_iter_list[i])
+ batch.append(_batch_i)
+ except ValueError:
+ pass
+ if len(batch) > 0:
+ batch_list = []
+ batch_item_size = len(batch[0])
+ for i in range(batch_item_size):
+ cur_item_list = [batch_i[i] for batch_i in batch]
+ batch_list.append(paddle.concat(cur_item_list, axis=0))
+ else:
+ batch_list = batch[0]
+ return batch_list
+
+
+def fill_batch(batch):
+ """
+ 2020.09.08: The current paddle version only supports returning data with the same length.
+ Therefore, fill in the batches with inconsistent lengths.
+ this method is currently only useful for text detection
+ """
+ keys = list(range(len(batch[0])))
+ v_max_len_dict = {}
+ for k in keys:
+ v_max_len_dict[k] = max([len(item[k]) for item in batch])
+ for item in batch:
+ length = []
+ for k in keys:
+ v = item[k]
+ length.append(len(v))
+ assert isinstance(v, np.ndarray)
+ if len(v) == v_max_len_dict[k]:
+ continue
+ try:
+ tmp_shape = [v_max_len_dict[k] - len(v)] + list(v[0].shape)
+ except:
+ a = 1
+ tmp_array = np.zeros(tmp_shape, dtype=v[0].dtype)
+ new_array = np.concatenate([v, tmp_array])
+ item[k] = new_array
+ item.append(length)
+ return batch
diff --git a/ppocr/data/det/data_augment.py b/ppocr/data/det/data_augment.py
deleted file mode 100644
index f46c14ae2c5a15cdace1072dcf217f2908202b49..0000000000000000000000000000000000000000
--- a/ppocr/data/det/data_augment.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# -*- coding:utf-8 -*-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import random
-import cv2
-import math
-
-import imgaug
-import imgaug.augmenters as iaa
-
-
-def AugmentData(data):
- img = data['image']
- shape = img.shape
-
- aug = iaa.Sequential(
- [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize(
- (0.5, 3))]).to_deterministic()
-
- def may_augment_annotation(aug, data, shape):
- if aug is None:
- return data
-
- line_polys = []
- for poly in data['polys']:
- new_poly = may_augment_poly(aug, shape, poly)
- line_polys.append(new_poly)
- data['polys'] = np.array(line_polys)
- return data
-
- def may_augment_poly(aug, img_shape, poly):
- keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
- keypoints = aug.augment_keypoints(
- [imgaug.KeypointsOnImage(
- keypoints, shape=img_shape)])[0].keypoints
- poly = [(p.x, p.y) for p in keypoints]
- return poly
-
- img_aug = aug.augment_image(img)
- data['image'] = img_aug
- data = may_augment_annotation(aug, data, shape)
- return data
diff --git a/ppocr/data/det/dataset_traversal.py b/ppocr/data/det/dataset_traversal.py
deleted file mode 100644
index bd055c82be3bd5c762ccaf7a0134f89fbe4fe290..0000000000000000000000000000000000000000
--- a/ppocr/data/det/dataset_traversal.py
+++ /dev/null
@@ -1,167 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import sys
-import math
-import random
-import functools
-import numpy as np
-import cv2
-import string
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import get_image_file_list
-import time
-
-
-class TrainReader(object):
- def __init__(self, params):
- self.num_workers = params['num_workers']
- self.label_file_path = params['label_file_path']
- print(self.label_file_path)
- self.use_mul_data = False
- if isinstance(self.label_file_path, list):
- self.use_mul_data = True
- self.data_ratio_list = params['data_ratio_list']
- self.batch_size = params['train_batch_size_per_card']
- assert 'process_function' in params,\
- "absence process_function in Reader"
- self.process = create_module(params['process_function'])(params)
-
- def __call__(self, process_id):
- def sample_iter_reader():
- with open(self.label_file_path, "rb") as fin:
- label_infor_list = fin.readlines()
- img_num = len(label_infor_list)
- img_id_list = list(range(img_num))
- random.shuffle(img_id_list)
- if sys.platform == "win32" and self.num_workers != 1:
- print("multiprocess is not fully compatible with Windows."
- "num_workers will be 1.")
- self.num_workers = 1
- for img_id in range(process_id, img_num, self.num_workers):
- label_infor = label_infor_list[img_id_list[img_id]]
- outs = self.process(label_infor)
- if outs is None:
- continue
- yield outs
-
- def sample_iter_reader_mul():
- batch_size = 1000
- data_source_list = self.label_file_path
- batch_size_list = list(map(int, [max(1.0, batch_size * x) for x in self.data_ratio_list]))
- print(self.data_ratio_list, batch_size_list)
-
- data_filename_list, data_size_list, fetch_record_list = [], [], []
- for data_source in data_source_list:
- image_files = open(data_source, "rb").readlines()
- random.shuffle(image_files)
- data_filename_list.append(image_files)
- data_size_list.append(len(image_files))
- fetch_record_list.append(0)
-
- image_batch = []
- # get a batch of img_fns and poly_fns
- for i in range(0, len(batch_size_list)):
- bs = batch_size_list[i]
- ds = data_size_list[i]
- image_names = data_filename_list[i]
- fetch_record = fetch_record_list[i]
- data_path = data_source_list[i]
- for j in range(fetch_record, fetch_record + bs):
- index = j % ds
- image_batch.append(image_names[index])
-
- if (fetch_record + bs) > ds:
- fetch_record_list[i] = 0
- random.shuffle(data_filename_list[i])
- else:
- fetch_record_list[i] = fetch_record + bs
-
- if sys.platform == "win32":
- print("multiprocess is not fully compatible with Windows."
- "num_workers will be 1.")
- self.num_workers = 1
-
- for label_infor in image_batch:
- outs = self.process(label_infor)
- if outs is None:
- continue
- yield outs
-
- def batch_iter_reader():
- batch_outs = []
- if self.use_mul_data:
- print("Sample date from multiple datasets!")
- for outs in sample_iter_reader_mul():
- batch_outs.append(outs)
- if len(batch_outs) == self.batch_size:
- yield batch_outs
- batch_outs = []
- else:
- for outs in sample_iter_reader():
- batch_outs.append(outs)
- if len(batch_outs) == self.batch_size:
- yield batch_outs
- batch_outs = []
-
- return batch_iter_reader
-
-
-class EvalTestReader(object):
- def __init__(self, params):
- self.params = params
- assert 'process_function' in params,\
- "absence process_function in EvalTestReader"
-
- def __call__(self, mode):
- process_function = create_module(self.params['process_function'])(
- self.params)
- batch_size = self.params['test_batch_size_per_card']
-
- img_list = []
- if mode != "test":
- img_set_dir = self.params['img_set_dir']
- img_name_list_path = self.params['label_file_path']
- with open(img_name_list_path, "rb") as fin:
- lines = fin.readlines()
- for line in lines:
- img_name = line.decode().strip("\n").split("\t")[0]
- img_path = os.path.join(img_set_dir, img_name)
- img_list.append(img_path)
- else:
- img_path = self.params['infer_img']
- img_list = get_image_file_list(img_path)
-
- def batch_iter_reader():
- batch_outs = []
- for img_path in img_list:
- img = cv2.imread(img_path)
- if img is None:
- logger.info("{} does not exist!".format(img_path))
- continue
- elif len(list(img.shape)) == 2 or img.shape[2] == 1:
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
- outs = process_function(img)
- outs.append(img_path)
- batch_outs.append(outs)
- if len(batch_outs) == batch_size:
- yield batch_outs
- batch_outs = []
- if len(batch_outs) != 0:
- yield batch_outs
-
- return batch_iter_reader
diff --git a/ppocr/data/det/db_process.py b/ppocr/data/det/db_process.py
deleted file mode 100644
index 9534c59ef69d830a8d991f421539c5e4e5bb3d39..0000000000000000000000000000000000000000
--- a/ppocr/data/det/db_process.py
+++ /dev/null
@@ -1,216 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import math
-import cv2
-import numpy as np
-import json
-import sys
-from ppocr.utils.utility import initial_logger, check_and_read_gif
-logger = initial_logger()
-
-from .data_augment import AugmentData
-from .random_crop_data import RandomCropData
-from .make_shrink_map import MakeShrinkMap
-from .make_border_map import MakeBorderMap
-
-
-class DBProcessTrain(object):
- """
- DB pre-process for Train mode
- """
-
- def __init__(self, params):
- self.img_set_dir = params['img_set_dir']
- self.image_shape = params['image_shape']
-
- def order_points_clockwise(self, pts):
- rect = np.zeros((4, 2), dtype="float32")
- s = pts.sum(axis=1)
- rect[0] = pts[np.argmin(s)]
- rect[2] = pts[np.argmax(s)]
- diff = np.diff(pts, axis=1)
- rect[1] = pts[np.argmin(diff)]
- rect[3] = pts[np.argmax(diff)]
- return rect
-
- def make_data_dict(self, imgvalue, entry):
- boxes = []
- texts = []
- ignores = []
- for rect in entry:
- points = rect['points']
- transcription = rect['transcription']
- try:
- box = self.order_points_clockwise(
- np.array(points).reshape(-1, 2))
- if cv2.contourArea(box) > 0:
- boxes.append(box)
- texts.append(transcription)
- ignores.append(transcription in ['*', '###'])
- except:
- print('load label failed!')
- data = {
- 'image': imgvalue,
- 'shape': [imgvalue.shape[0], imgvalue.shape[1]],
- 'polys': np.array(boxes),
- 'texts': texts,
- 'ignore_tags': ignores,
- }
- return data
-
- def NormalizeImage(self, data):
- im = data['image']
- img_mean = [0.485, 0.456, 0.406]
- img_std = [0.229, 0.224, 0.225]
- im = im.astype(np.float32, copy=False)
- im = im / 255
- im -= img_mean
- im /= img_std
- channel_swap = (2, 0, 1)
- im = im.transpose(channel_swap)
- data['image'] = im
- return data
-
- def FilterKeys(self, data):
- filter_keys = ['polys', 'texts', 'ignore_tags', 'shape']
- for key in filter_keys:
- if key in data:
- del data[key]
- return data
-
- def convert_label_infor(self, label_infor):
- label_infor = label_infor.decode()
- label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
- substr = label_infor.strip("\n").split("\t")
- img_path = self.img_set_dir + substr[0]
- label = json.loads(substr[1])
- return img_path, label
-
- def __call__(self, label_infor):
- img_path, gt_label = self.convert_label_infor(label_infor)
- imgvalue, flag = check_and_read_gif(img_path)
- if not flag:
- imgvalue = cv2.imread(img_path)
- if imgvalue is None:
- logger.info("{} does not exist!".format(img_path))
- return None
- if len(list(imgvalue.shape)) == 2 or imgvalue.shape[2] == 1:
- imgvalue = cv2.cvtColor(imgvalue, cv2.COLOR_GRAY2BGR)
- data = self.make_data_dict(imgvalue, gt_label)
- data = AugmentData(data)
- data = RandomCropData(data, self.image_shape[1:])
- data = MakeShrinkMap(data)
- data = MakeBorderMap(data)
- data = self.NormalizeImage(data)
- data = self.FilterKeys(data)
- return data['image'], data['shrink_map'], data['shrink_mask'], data[
- 'threshold_map'], data['threshold_mask']
-
-
-class DBProcessTest(object):
- """
- DB pre-process for Test mode
- """
-
- def __init__(self, params):
- super(DBProcessTest, self).__init__()
- self.resize_type = 0
- if 'test_image_shape' in params:
- self.image_shape = params['test_image_shape']
- # print(self.image_shape)
- self.resize_type = 1
- if 'max_side_len' in params:
- self.max_side_len = params['max_side_len']
- else:
- self.max_side_len = 2400
-
- def resize_image_type0(self, im):
- """
- resize image to a size multiple of 32 which is required by the network
- args:
- img(array): array with shape [h, w, c]
- return(tuple):
- img, (ratio_h, ratio_w)
- """
- max_side_len = self.max_side_len
- h, w, _ = im.shape
-
- resize_w = w
- resize_h = h
-
- # limit the max side
- if max(resize_h, resize_w) > max_side_len:
- if resize_h > resize_w:
- ratio = float(max_side_len) / resize_h
- else:
- ratio = float(max_side_len) / resize_w
- else:
- ratio = 1.
- resize_h = int(resize_h * ratio)
- resize_w = int(resize_w * ratio)
- if resize_h % 32 == 0:
- resize_h = resize_h
- elif resize_h // 32 <= 1:
- resize_h = 32
- else:
- resize_h = (resize_h // 32 - 1) * 32
- if resize_w % 32 == 0:
- resize_w = resize_w
- elif resize_w // 32 <= 1:
- resize_w = 32
- else:
- resize_w = (resize_w // 32 - 1) * 32
- try:
- if int(resize_w) <= 0 or int(resize_h) <= 0:
- return None, (None, None)
- im = cv2.resize(im, (int(resize_w), int(resize_h)))
- except:
- print(im.shape, resize_w, resize_h)
- sys.exit(0)
- ratio_h = resize_h / float(h)
- ratio_w = resize_w / float(w)
- return im, (ratio_h, ratio_w)
-
- def resize_image_type1(self, im):
- resize_h, resize_w = self.image_shape
- ori_h, ori_w = im.shape[:2] # (h, w, c)
- im = cv2.resize(im, (int(resize_w), int(resize_h)))
- ratio_h = float(resize_h) / ori_h
- ratio_w = float(resize_w) / ori_w
- return im, (ratio_h, ratio_w)
-
- def normalize(self, im):
- img_mean = [0.485, 0.456, 0.406]
- img_std = [0.229, 0.224, 0.225]
- im = im.astype(np.float32, copy=False)
- im = im / 255
- im[:, :, 0] -= img_mean[0]
- im[:, :, 1] -= img_mean[1]
- im[:, :, 2] -= img_mean[2]
- im[:, :, 0] /= img_std[0]
- im[:, :, 1] /= img_std[1]
- im[:, :, 2] /= img_std[2]
- channel_swap = (2, 0, 1)
- im = im.transpose(channel_swap)
- return im
-
- def __call__(self, im):
- if self.resize_type == 0:
- im, (ratio_h, ratio_w) = self.resize_image_type0(im)
- else:
- im, (ratio_h, ratio_w) = self.resize_image_type1(im)
- im = self.normalize(im)
- im = im[np.newaxis, :]
- return [im, (ratio_h, ratio_w)]
diff --git a/ppocr/data/det/east_process.py b/ppocr/data/det/east_process.py
deleted file mode 100755
index e2581caa20bb0e63f67e110d483d3335f51d56b7..0000000000000000000000000000000000000000
--- a/ppocr/data/det/east_process.py
+++ /dev/null
@@ -1,537 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import math
-import cv2
-import numpy as np
-import json
-import sys
-import os
-
-class EASTProcessTrain(object):
- def __init__(self, params):
- self.img_set_dir = params['img_set_dir']
- self.random_scale = np.array([0.5, 1, 2.0, 3.0])
- self.background_ratio = params['background_ratio']
- self.min_crop_side_ratio = params['min_crop_side_ratio']
- image_shape = params['image_shape']
- self.input_size = image_shape[1]
- self.min_text_size = params['min_text_size']
-
- def preprocess(self, im):
- input_size = self.input_size
- im_shape = im.shape
- im_size_min = np.min(im_shape[0:2])
- im_size_max = np.max(im_shape[0:2])
- im_scale = float(input_size) / float(im_size_max)
- im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale)
- img_mean = [0.485, 0.456, 0.406]
- img_std = [0.229, 0.224, 0.225]
- im = im[:, :, ::-1].astype(np.float32)
- im = im / 255
- im -= img_mean
- im /= img_std
- new_h, new_w, _ = im.shape
- im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32)
- im_padded[:new_h, :new_w, :] = im
- im_padded = im_padded.transpose((2, 0, 1))
- im_padded = im_padded[np.newaxis, :]
- return im_padded, im_scale
-
- def convert_label_infor(self, label_infor):
- label_infor = label_infor.decode()
- label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
- substr = label_infor.strip("\n").split("\t")
- img_path = os.path.join(self.img_set_dir, substr[0])
- label = json.loads(substr[1])
- nBox = len(label)
- wordBBs, txts, txt_tags = [], [], []
- for bno in range(0, nBox):
- wordBB = label[bno]['points']
- txt = label[bno]['transcription']
- wordBBs.append(wordBB)
- txts.append(txt)
- if txt == '###':
- txt_tags.append(True)
- else:
- txt_tags.append(False)
- wordBBs = np.array(wordBBs, dtype=np.float32)
- txt_tags = np.array(txt_tags, dtype=np.bool)
- return img_path, wordBBs, txt_tags, txts
-
- def rotate_im_poly(self, im, text_polys):
- """
- rotate image with 90 / 180 / 270 degre
- """
- im_w, im_h = im.shape[1], im.shape[0]
- dst_im = im.copy()
- dst_polys = []
- rand_degree_ratio = np.random.rand()
- rand_degree_cnt = 1
- if 0.333 < rand_degree_ratio < 0.666:
- rand_degree_cnt = 2
- elif rand_degree_ratio > 0.666:
- rand_degree_cnt = 3
- for i in range(rand_degree_cnt):
- dst_im = np.rot90(dst_im)
- rot_degree = -90 * rand_degree_cnt
- rot_angle = rot_degree * math.pi / 180.0
- n_poly = text_polys.shape[0]
- cx, cy = 0.5 * im_w, 0.5 * im_h
- ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0]
- for i in range(n_poly):
- wordBB = text_polys[i]
- poly = []
- for j in range(4):
- sx, sy = wordBB[j][0], wordBB[j][1]
- dx = math.cos(rot_angle) * (sx - cx)\
- - math.sin(rot_angle) * (sy - cy) + ncx
- dy = math.sin(rot_angle) * (sx - cx)\
- + math.cos(rot_angle) * (sy - cy) + ncy
- poly.append([dx, dy])
- dst_polys.append(poly)
- dst_polys = np.array(dst_polys, dtype=np.float32)
- return dst_im, dst_polys
-
- def polygon_area(self, poly):
- """
- compute area of a polygon
- :param poly:
- :return:
- """
- edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
- (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
- (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
- (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
- return np.sum(edge) / 2.
-
- def check_and_validate_polys(self, polys, tags, img_height, img_width):
- """
- check so that the text poly is in the same direction,
- and also filter some invalid polygons
- :param polys:
- :param tags:
- :return:
- """
- h, w = img_height, img_width
- if polys.shape[0] == 0:
- return polys
- polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
- polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
-
- validated_polys = []
- validated_tags = []
- for poly, tag in zip(polys, tags):
- p_area = self.polygon_area(poly)
- #invalid poly
- if abs(p_area) < 1:
- continue
- if p_area > 0:
- #'poly in wrong direction'
- if not tag:
- tag = True #reversed cases should be ignore
- poly = poly[(0, 3, 2, 1), :]
- validated_polys.append(poly)
- validated_tags.append(tag)
- return np.array(validated_polys), np.array(validated_tags)
-
- def draw_img_polys(self, img, polys):
- if len(img.shape) == 4:
- img = np.squeeze(img, axis=0)
- if img.shape[0] == 3:
- img = img.transpose((1, 2, 0))
- img[:, :, 2] += 123.68
- img[:, :, 1] += 116.78
- img[:, :, 0] += 103.94
- cv2.imwrite("tmp.jpg", img)
- img = cv2.imread("tmp.jpg")
- for box in polys:
- box = box.astype(np.int32).reshape((-1, 1, 2))
- cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2)
- import random
- ino = random.randint(0, 100)
- cv2.imwrite("tmp_%d.jpg" % ino, img)
- return
-
- def shrink_poly(self, poly, r):
- """
- fit a poly inside the origin poly, maybe bugs here...
- used for generate the score map
- :param poly: the text poly
- :param r: r in the paper
- :return: the shrinked poly
- """
- # shrink ratio
- R = 0.3
- # find the longer pair
- dist0 = np.linalg.norm(poly[0] - poly[1])
- dist1 = np.linalg.norm(poly[2] - poly[3])
- dist2 = np.linalg.norm(poly[0] - poly[3])
- dist3 = np.linalg.norm(poly[1] - poly[2])
- if dist0 + dist1 > dist2 + dist3:
- # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
- ## p0, p1
- theta = np.arctan2((poly[1][1] - poly[0][1]),
- (poly[1][0] - poly[0][0]))
- poly[0][0] += R * r[0] * np.cos(theta)
- poly[0][1] += R * r[0] * np.sin(theta)
- poly[1][0] -= R * r[1] * np.cos(theta)
- poly[1][1] -= R * r[1] * np.sin(theta)
- ## p2, p3
- theta = np.arctan2((poly[2][1] - poly[3][1]),
- (poly[2][0] - poly[3][0]))
- poly[3][0] += R * r[3] * np.cos(theta)
- poly[3][1] += R * r[3] * np.sin(theta)
- poly[2][0] -= R * r[2] * np.cos(theta)
- poly[2][1] -= R * r[2] * np.sin(theta)
- ## p0, p3
- theta = np.arctan2((poly[3][0] - poly[0][0]),
- (poly[3][1] - poly[0][1]))
- poly[0][0] += R * r[0] * np.sin(theta)
- poly[0][1] += R * r[0] * np.cos(theta)
- poly[3][0] -= R * r[3] * np.sin(theta)
- poly[3][1] -= R * r[3] * np.cos(theta)
- ## p1, p2
- theta = np.arctan2((poly[2][0] - poly[1][0]),
- (poly[2][1] - poly[1][1]))
- poly[1][0] += R * r[1] * np.sin(theta)
- poly[1][1] += R * r[1] * np.cos(theta)
- poly[2][0] -= R * r[2] * np.sin(theta)
- poly[2][1] -= R * r[2] * np.cos(theta)
- else:
- ## p0, p3
- # print poly
- theta = np.arctan2((poly[3][0] - poly[0][0]),
- (poly[3][1] - poly[0][1]))
- poly[0][0] += R * r[0] * np.sin(theta)
- poly[0][1] += R * r[0] * np.cos(theta)
- poly[3][0] -= R * r[3] * np.sin(theta)
- poly[3][1] -= R * r[3] * np.cos(theta)
- ## p1, p2
- theta = np.arctan2((poly[2][0] - poly[1][0]),
- (poly[2][1] - poly[1][1]))
- poly[1][0] += R * r[1] * np.sin(theta)
- poly[1][1] += R * r[1] * np.cos(theta)
- poly[2][0] -= R * r[2] * np.sin(theta)
- poly[2][1] -= R * r[2] * np.cos(theta)
- ## p0, p1
- theta = np.arctan2((poly[1][1] - poly[0][1]),
- (poly[1][0] - poly[0][0]))
- poly[0][0] += R * r[0] * np.cos(theta)
- poly[0][1] += R * r[0] * np.sin(theta)
- poly[1][0] -= R * r[1] * np.cos(theta)
- poly[1][1] -= R * r[1] * np.sin(theta)
- ## p2, p3
- theta = np.arctan2((poly[2][1] - poly[3][1]),
- (poly[2][0] - poly[3][0]))
- poly[3][0] += R * r[3] * np.cos(theta)
- poly[3][1] += R * r[3] * np.sin(theta)
- poly[2][0] -= R * r[2] * np.cos(theta)
- poly[2][1] -= R * r[2] * np.sin(theta)
- return poly
-
- def generate_quad(self, im_size, polys, tags):
- """
- Generate quadrangle.
- """
- h, w = im_size
- poly_mask = np.zeros((h, w), dtype=np.uint8)
- score_map = np.zeros((h, w), dtype=np.uint8)
- # (x1, y1, ..., x4, y4, short_edge_norm)
- geo_map = np.zeros((h, w, 9), dtype=np.float32)
- # mask used during traning, to ignore some hard areas
- training_mask = np.ones((h, w), dtype=np.uint8)
- for poly_idx, poly_tag in enumerate(zip(polys, tags)):
- poly = poly_tag[0]
- tag = poly_tag[1]
-
- r = [None, None, None, None]
- for i in range(4):
- dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4])
- dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4])
- r[i] = min(dist1, dist2)
- # score map
- shrinked_poly = self.shrink_poly(
- poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
- cv2.fillPoly(score_map, shrinked_poly, 1)
- cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1)
- # if the poly is too small, then ignore it during training
- poly_h = min(
- np.linalg.norm(poly[0] - poly[3]),
- np.linalg.norm(poly[1] - poly[2]))
- poly_w = min(
- np.linalg.norm(poly[0] - poly[1]),
- np.linalg.norm(poly[2] - poly[3]))
- if min(poly_h, poly_w) < self.min_text_size:
- cv2.fillPoly(training_mask,
- poly.astype(np.int32)[np.newaxis, :, :], 0)
-
- if tag:
- cv2.fillPoly(training_mask,
- poly.astype(np.int32)[np.newaxis, :, :], 0)
-
- xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1))
- # geo map.
- y_in_poly = xy_in_poly[:, 0]
- x_in_poly = xy_in_poly[:, 1]
- poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w)
- poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h)
- for pno in range(4):
- geo_channel_beg = pno * 2
- geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\
- x_in_poly - poly[pno, 0]
- geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\
- y_in_poly - poly[pno, 1]
- geo_map[y_in_poly, x_in_poly, 8] = \
- 1.0 / max(min(poly_h, poly_w), 1.0)
- return score_map, geo_map, training_mask
-
- def crop_area(self,
- im,
- polys,
- tags,
- txts,
- crop_background=False,
- max_tries=50):
- """
- make random crop from the input image
- :param im:
- :param polys:
- :param tags:
- :param crop_background:
- :param max_tries:
- :return:
- """
- h, w, _ = im.shape
- pad_h = h // 10
- pad_w = w // 10
- h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
- w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
- for poly in polys:
- poly = np.round(poly, decimals=0).astype(np.int32)
- minx = np.min(poly[:, 0])
- maxx = np.max(poly[:, 0])
- w_array[minx + pad_w:maxx + pad_w] = 1
- miny = np.min(poly[:, 1])
- maxy = np.max(poly[:, 1])
- h_array[miny + pad_h:maxy + pad_h] = 1
- # ensure the cropped area not across a text
- h_axis = np.where(h_array == 0)[0]
- w_axis = np.where(w_array == 0)[0]
- if len(h_axis) == 0 or len(w_axis) == 0:
- return im, polys, tags, txts
-
- for i in range(max_tries):
- xx = np.random.choice(w_axis, size=2)
- xmin = np.min(xx) - pad_w
- xmax = np.max(xx) - pad_w
- xmin = np.clip(xmin, 0, w - 1)
- xmax = np.clip(xmax, 0, w - 1)
- yy = np.random.choice(h_axis, size=2)
- ymin = np.min(yy) - pad_h
- ymax = np.max(yy) - pad_h
- ymin = np.clip(ymin, 0, h - 1)
- ymax = np.clip(ymax, 0, h - 1)
- if xmax - xmin < self.min_crop_side_ratio * w or \
- ymax - ymin < self.min_crop_side_ratio * h:
- # area too small
- continue
- if polys.shape[0] != 0:
- poly_axis_in_area = (polys[:, :, 0] >= xmin)\
- & (polys[:, :, 0] <= xmax)\
- & (polys[:, :, 1] >= ymin)\
- & (polys[:, :, 1] <= ymax)
- selected_polys = np.where(
- np.sum(poly_axis_in_area, axis=1) == 4)[0]
- else:
- selected_polys = []
-
- if len(selected_polys) == 0:
- # no text in this area
- if crop_background:
- im = im[ymin:ymax + 1, xmin:xmax + 1, :]
- polys = []
- tags = []
- txts = []
- return im, polys, tags, txts
- else:
- continue
-
- im = im[ymin:ymax + 1, xmin:xmax + 1, :]
- polys = polys[selected_polys]
- tags = tags[selected_polys]
- txts_tmp = []
- for selected_poly in selected_polys:
- txts_tmp.append(txts[selected_poly])
- txts = txts_tmp
- polys[:, :, 0] -= xmin
- polys[:, :, 1] -= ymin
- return im, polys, tags, txts
- return im, polys, tags, txts
-
- def crop_background_infor(self, im, text_polys, text_tags, text_strs):
- im, text_polys, text_tags, text_strs = self.crop_area(
- im, text_polys, text_tags, text_strs, crop_background=True)
- if len(text_polys) > 0:
- return None
- # pad and resize image
- input_size = self.input_size
- im, ratio = self.preprocess(im)
- score_map = np.zeros((input_size, input_size), dtype=np.float32)
- geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32)
- training_mask = np.ones((input_size, input_size), dtype=np.float32)
- return im, score_map, geo_map, training_mask
-
- def crop_foreground_infor(self, im, text_polys, text_tags, text_strs):
- im, text_polys, text_tags, text_strs = self.crop_area(
- im, text_polys, text_tags, text_strs, crop_background=False)
- if text_polys.shape[0] == 0:
- return None
- #continue for all ignore case
- if np.sum((text_tags * 1.0)) >= text_tags.size:
- return None
- # pad and resize image
- input_size = self.input_size
- im, ratio = self.preprocess(im)
- text_polys[:, :, 0] *= ratio
- text_polys[:, :, 1] *= ratio
- _, _, new_h, new_w = im.shape
- # print(im.shape)
- # self.draw_img_polys(im, text_polys)
- score_map, geo_map, training_mask = self.generate_quad(
- (new_h, new_w), text_polys, text_tags)
- return im, score_map, geo_map, training_mask
-
- def __call__(self, label_infor):
- infor = self.convert_label_infor(label_infor)
- im_path, text_polys, text_tags, text_strs = infor
- im = cv2.imread(im_path)
- if im is None:
- return None
- if text_polys.shape[0] == 0:
- return None
- #add rotate cases
- if np.random.rand() < 0.5:
- im, text_polys = self.rotate_im_poly(im, text_polys)
- h, w, _ = im.shape
- text_polys, text_tags = self.check_and_validate_polys(text_polys,
- text_tags, h, w)
- if text_polys.shape[0] == 0:
- return None
-
- # random scale this image
- rd_scale = np.random.choice(self.random_scale)
- im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
- text_polys *= rd_scale
- if np.random.rand() < self.background_ratio:
- outs = self.crop_background_infor(im, text_polys, text_tags,
- text_strs)
- else:
- outs = self.crop_foreground_infor(im, text_polys, text_tags,
- text_strs)
-
- if outs is None:
- return None
- im, score_map, geo_map, training_mask = outs
- score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32)
- geo_map = np.swapaxes(geo_map, 1, 2)
- geo_map = np.swapaxes(geo_map, 1, 0)
- geo_map = geo_map[:, ::4, ::4].astype(np.float32)
- training_mask = training_mask[np.newaxis, ::4, ::4]
- training_mask = training_mask.astype(np.float32)
- return im, score_map, geo_map, training_mask
-
-
-class EASTProcessTest(object):
- def __init__(self, params):
- super(EASTProcessTest, self).__init__()
- self.resize_type = 0
- if 'test_image_shape' in params:
- self.image_shape = params['test_image_shape']
- # print(self.image_shape)
- self.resize_type = 1
- if 'max_side_len' in params:
- self.max_side_len = params['max_side_len']
- else:
- self.max_side_len = 2400
-
- def resize_image_type0(self, im):
- """
- resize image to a size multiple of 32 which is required by the network
- args:
- img(array): array with shape [h, w, c]
- return(tuple):
- img, (ratio_h, ratio_w)
- """
- max_side_len = self.max_side_len
- h, w, _ = im.shape
-
- resize_w = w
- resize_h = h
-
- # limit the max side
- if max(resize_h, resize_w) > max_side_len:
- if resize_h > resize_w:
- ratio = float(max_side_len) / resize_h
- else:
- ratio = float(max_side_len) / resize_w
- else:
- ratio = 1.
- resize_h = int(resize_h * ratio)
- resize_w = int(resize_w * ratio)
- if resize_h % 32 == 0:
- resize_h = resize_h
- elif resize_h // 32 <= 1:
- resize_h = 32
- else:
- resize_h = (resize_h // 32 - 1) * 32
- if resize_w % 32 == 0:
- resize_w = resize_w
- elif resize_w // 32 <= 1:
- resize_w = 32
- else:
- resize_w = (resize_w // 32 - 1) * 32
- try:
- if int(resize_w) <= 0 or int(resize_h) <= 0:
- return None, (None, None)
- im = cv2.resize(im, (int(resize_w), int(resize_h)))
- except:
- print(im.shape, resize_w, resize_h)
- sys.exit(0)
- ratio_h = resize_h / float(h)
- ratio_w = resize_w / float(w)
- return im, (ratio_h, ratio_w)
-
- def resize_image_type1(self, im):
- resize_h, resize_w = self.image_shape
- ori_h, ori_w = im.shape[:2] # (h, w, c)
- im = cv2.resize(im, (int(resize_w), int(resize_h)))
- ratio_h = float(resize_h) / ori_h
- ratio_w = float(resize_w) / ori_w
- return im, (ratio_h, ratio_w)
-
- def __call__(self, im):
- if self.resize_type == 0:
- im, (ratio_h, ratio_w) = self.resize_image_type0(im)
- else:
- im, (ratio_h, ratio_w) = self.resize_image_type1(im)
- img_mean = [0.485, 0.456, 0.406]
- img_std = [0.229, 0.224, 0.225]
- im = im[:, :, ::-1].astype(np.float32)
- im = im / 255
- im -= img_mean
- im /= img_std
- im = im.transpose((2, 0, 1))
- im = im[np.newaxis, :]
- return [im, (ratio_h, ratio_w)]
diff --git a/ppocr/data/det/make_border_map.py b/ppocr/data/det/make_border_map.py
deleted file mode 100644
index 559416461b20f0de74daebe67772e56c23d55328..0000000000000000000000000000000000000000
--- a/ppocr/data/det/make_border_map.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# -*- coding:utf-8 -*-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import cv2
-np.seterr(divide='ignore', invalid='ignore')
-import pyclipper
-from shapely.geometry import Polygon
-import sys
-import warnings
-warnings.simplefilter("ignore")
-
-
-def draw_border_map(polygon, canvas, mask, shrink_ratio):
- polygon = np.array(polygon)
- assert polygon.ndim == 2
- assert polygon.shape[1] == 2
-
- polygon_shape = Polygon(polygon)
- if polygon_shape.area <= 0:
- return
- distance = polygon_shape.area * (
- 1 - np.power(shrink_ratio, 2)) / polygon_shape.length
- subject = [tuple(l) for l in polygon]
- padding = pyclipper.PyclipperOffset()
- padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
-
- padded_polygon = np.array(padding.Execute(distance)[0])
- cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
-
- xmin = padded_polygon[:, 0].min()
- xmax = padded_polygon[:, 0].max()
- ymin = padded_polygon[:, 1].min()
- ymax = padded_polygon[:, 1].max()
- width = xmax - xmin + 1
- height = ymax - ymin + 1
-
- polygon[:, 0] = polygon[:, 0] - xmin
- polygon[:, 1] = polygon[:, 1] - ymin
-
- xs = np.broadcast_to(
- np.linspace(
- 0, width - 1, num=width).reshape(1, width), (height, width))
- ys = np.broadcast_to(
- np.linspace(
- 0, height - 1, num=height).reshape(height, 1), (height, width))
-
- distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)
- for i in range(polygon.shape[0]):
- j = (i + 1) % polygon.shape[0]
- absolute_distance = _distance(xs, ys, polygon[i], polygon[j])
- distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
- distance_map = distance_map.min(axis=0)
-
- xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
- xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
- ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
- ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
- canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
- 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
- xmin_valid - xmin:xmax_valid - xmax + width],
- canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
-
-
-def _distance(xs, ys, point_1, point_2):
- '''
- compute the distance from point to a line
- ys: coordinates in the first axis
- xs: coordinates in the second axis
- point_1, point_2: (x, y), the end of the line
- '''
- height, width = xs.shape[:2]
- square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
- square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
- square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[
- 1] - point_2[1])
-
- cosin = (square_distance - square_distance_1 - square_distance_2) / (
- 2 * np.sqrt(square_distance_1 * square_distance_2))
- square_sin = 1 - np.square(cosin)
- square_sin = np.nan_to_num(square_sin)
- result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
- square_distance)
-
- result[cosin <
- 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin <
- 0]
- # self.extend_line(point_1, point_2, result)
- return result
-
-
-def extend_line(point_1, point_2, result, shrink_ratio):
- ex_point_1 = (
- int(
- round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
- int(
- round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))
- cv2.line(
- result,
- tuple(ex_point_1),
- tuple(point_1),
- 4096.0,
- 1,
- lineType=cv2.LINE_AA,
- shift=0)
- ex_point_2 = (
- int(
- round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
- int(
- round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))
- cv2.line(
- result,
- tuple(ex_point_2),
- tuple(point_2),
- 4096.0,
- 1,
- lineType=cv2.LINE_AA,
- shift=0)
- return ex_point_1, ex_point_2
-
-
-def MakeBorderMap(data):
- shrink_ratio = 0.4
- thresh_min = 0.3
- thresh_max = 0.7
-
- im = data['image']
- text_polys = data['polys']
- ignore_tags = data['ignore_tags']
-
- canvas = np.zeros(im.shape[:2], dtype=np.float32)
- mask = np.zeros(im.shape[:2], dtype=np.float32)
-
- for i in range(len(text_polys)):
- if ignore_tags[i]:
- continue
- draw_border_map(
- text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio)
- canvas = canvas * (thresh_max - thresh_min) + thresh_min
-
- data['threshold_map'] = canvas
- data['threshold_mask'] = mask
- return data
diff --git a/ppocr/data/det/make_shrink_map.py b/ppocr/data/det/make_shrink_map.py
deleted file mode 100644
index dec5c6f2ab9e5d7ef990f84177b24a37608851f2..0000000000000000000000000000000000000000
--- a/ppocr/data/det/make_shrink_map.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# -*- coding:utf-8 -*-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import cv2
-from shapely.geometry import Polygon
-import pyclipper
-
-
-def validate_polygons(polygons, ignore_tags, h, w):
- '''
- polygons (numpy.array, required): of shape (num_instances, num_points, 2)
- '''
- if len(polygons) == 0:
- return polygons, ignore_tags
- assert len(polygons) == len(ignore_tags)
- for polygon in polygons:
- polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
- polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
-
- for i in range(len(polygons)):
- area = polygon_area(polygons[i])
- if abs(area) < 1:
- ignore_tags[i] = True
- if area > 0:
- polygons[i] = polygons[i][::-1, :]
- return polygons, ignore_tags
-
-
-def polygon_area(polygon):
- edge = 0
- for i in range(polygon.shape[0]):
- next_index = (i + 1) % polygon.shape[0]
- edge += (polygon[next_index, 0] - polygon[i, 0]) * (
- polygon[next_index, 1] - polygon[i, 1])
-
- return edge / 2.
-
-
-def MakeShrinkMap(data):
- min_text_size = 8
- shrink_ratio = 0.4
-
- image = data['image']
- text_polys = data['polys']
- ignore_tags = data['ignore_tags']
-
- h, w = image.shape[:2]
- text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w)
- gt = np.zeros((h, w), dtype=np.float32)
- # gt = np.zeros((1, h, w), dtype=np.float32)
- mask = np.ones((h, w), dtype=np.float32)
- for i in range(len(text_polys)):
- polygon = text_polys[i]
- height = max(polygon[:, 1]) - min(polygon[:, 1])
- width = max(polygon[:, 0]) - min(polygon[:, 0])
- # height = min(np.linalg.norm(polygon[0] - polygon[3]),
- # np.linalg.norm(polygon[1] - polygon[2]))
- # width = min(np.linalg.norm(polygon[0] - polygon[1]),
- # np.linalg.norm(polygon[2] - polygon[3]))
- if ignore_tags[i] or min(height, width) < min_text_size:
- cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
- ignore_tags[i] = True
- else:
- polygon_shape = Polygon(polygon)
- distance = polygon_shape.area * (
- 1 - np.power(shrink_ratio, 2)) / polygon_shape.length
- subject = [tuple(l) for l in text_polys[i]]
- padding = pyclipper.PyclipperOffset()
- padding.AddPath(subject, pyclipper.JT_ROUND,
- pyclipper.ET_CLOSEDPOLYGON)
- shrinked = padding.Execute(-distance)
- if shrinked == []:
- cv2.fillPoly(mask,
- polygon.astype(np.int32)[np.newaxis, :, :], 0)
- ignore_tags[i] = True
- continue
- shrinked = np.array(shrinked[0]).reshape(-1, 2)
- cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
- # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
-
- data['shrink_map'] = gt
- data['shrink_mask'] = mask
- return data
diff --git a/ppocr/data/det/random_crop_data.py b/ppocr/data/det/random_crop_data.py
deleted file mode 100644
index d0c081e785cb17282b5486c718446b97a580b6cc..0000000000000000000000000000000000000000
--- a/ppocr/data/det/random_crop_data.py
+++ /dev/null
@@ -1,155 +0,0 @@
-# -*- coding:utf-8 -*-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import numpy as np
-import cv2
-import random
-
-
-def is_poly_in_rect(poly, x, y, w, h):
- poly = np.array(poly)
- if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
- return False
- if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
- return False
- return True
-
-
-def is_poly_outside_rect(poly, x, y, w, h):
- poly = np.array(poly)
- if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
- return True
- if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
- return True
- return False
-
-
-def split_regions(axis):
- regions = []
- min_axis = 0
- for i in range(1, axis.shape[0]):
- if axis[i] != axis[i - 1] + 1:
- region = axis[min_axis:i]
- min_axis = i
- regions.append(region)
- return regions
-
-
-def random_select(axis, max_size):
- xx = np.random.choice(axis, size=2)
- xmin = np.min(xx)
- xmax = np.max(xx)
- xmin = np.clip(xmin, 0, max_size - 1)
- xmax = np.clip(xmax, 0, max_size - 1)
- return xmin, xmax
-
-
-def region_wise_random_select(regions, max_size):
- selected_index = list(np.random.choice(len(regions), 2))
- selected_values = []
- for index in selected_index:
- axis = regions[index]
- xx = int(np.random.choice(axis, size=1))
- selected_values.append(xx)
- xmin = min(selected_values)
- xmax = max(selected_values)
- return xmin, xmax
-
-
-def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
- h, w, _ = im.shape
- h_array = np.zeros(h, dtype=np.int32)
- w_array = np.zeros(w, dtype=np.int32)
- for points in text_polys:
- points = np.round(points, decimals=0).astype(np.int32)
- minx = np.min(points[:, 0])
- maxx = np.max(points[:, 0])
- w_array[minx:maxx] = 1
- miny = np.min(points[:, 1])
- maxy = np.max(points[:, 1])
- h_array[miny:maxy] = 1
- # ensure the cropped area not across a text
- h_axis = np.where(h_array == 0)[0]
- w_axis = np.where(w_array == 0)[0]
-
- if len(h_axis) == 0 or len(w_axis) == 0:
- return 0, 0, w, h
-
- h_regions = split_regions(h_axis)
- w_regions = split_regions(w_axis)
-
- for i in range(max_tries):
- if len(w_regions) > 1:
- xmin, xmax = region_wise_random_select(w_regions, w)
- else:
- xmin, xmax = random_select(w_axis, w)
- if len(h_regions) > 1:
- ymin, ymax = region_wise_random_select(h_regions, h)
- else:
- ymin, ymax = random_select(h_axis, h)
-
- if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
- # area too small
- continue
- num_poly_in_rect = 0
- for poly in text_polys:
- if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
- ymax - ymin):
- num_poly_in_rect += 1
- break
-
- if num_poly_in_rect > 0:
- return xmin, ymin, xmax - xmin, ymax - ymin
-
- return 0, 0, w, h
-
-
-def RandomCropData(data, size):
- max_tries = 10
- min_crop_side_ratio = 0.1
- require_original_image = False
- keep_ratio = True
-
- im = data['image']
- text_polys = data['polys']
- ignore_tags = data['ignore_tags']
- texts = data['texts']
- all_care_polys = [
- text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
- ]
- # 计算crop区域
- crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys,
- min_crop_side_ratio, max_tries)
- # crop 图片 保持比例填充
- scale_w = size[0] / crop_w
- scale_h = size[1] / crop_h
- scale = min(scale_w, scale_h)
- h = int(crop_h * scale)
- w = int(crop_w * scale)
- if keep_ratio:
- padimg = np.zeros((size[1], size[0], im.shape[2]), im.dtype)
- padimg[:h, :w] = cv2.resize(
- im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
- img = padimg
- else:
- img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
- tuple(size))
- # crop 文本框
- text_polys_crop = []
- ignore_tags_crop = []
- texts_crop = []
- for poly, text, tag in zip(text_polys, texts, ignore_tags):
- poly = ((poly - (crop_x, crop_y)) * scale).tolist()
- if not is_poly_outside_rect(poly, 0, 0, w, h):
- text_polys_crop.append(poly)
- ignore_tags_crop.append(tag)
- texts_crop.append(text)
- data['image'] = img
- data['polys'] = np.array(text_polys_crop)
- data['ignore_tags'] = ignore_tags_crop
- data['texts'] = texts_crop
- return data
diff --git a/ppocr/data/det/sast_process.py b/ppocr/data/det/sast_process.py
deleted file mode 100644
index 74a848465f4cedb4a7007f61adc509d80885922c..0000000000000000000000000000000000000000
--- a/ppocr/data/det/sast_process.py
+++ /dev/null
@@ -1,781 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import math
-import cv2
-import numpy as np
-import json
-
-
-class SASTProcessTrain(object):
- """
- SAST process function for training
- """
- def __init__(self, params):
- self.img_set_dir = params['img_set_dir']
- self.min_crop_side_ratio = params['min_crop_side_ratio']
- self.min_crop_size = params['min_crop_size']
- image_shape = params['image_shape']
- self.input_size = image_shape[1]
- self.min_text_size = params['min_text_size']
- self.max_text_size = params['max_text_size']
-
- def convert_label_infor(self, label_infor):
- label_infor = label_infor.decode()
- label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
- substr = label_infor.strip("\n").split("\t")
- img_path = self.img_set_dir + substr[0]
- label = json.loads(substr[1])
- nBox = len(label)
- wordBBs, txts, txt_tags = [], [], []
- for bno in range(0, nBox):
- wordBB = label[bno]['points']
- txt = label[bno]['transcription']
- wordBBs.append(wordBB)
- txts.append(txt)
- if txt == '###':
- txt_tags.append(True)
- else:
- txt_tags.append(False)
- wordBBs = np.array(wordBBs, dtype=np.float32)
- txt_tags = np.array(txt_tags, dtype=np.bool)
- return img_path, wordBBs, txt_tags, txts
-
- def quad_area(self, poly):
- """
- compute area of a polygon
- :param poly:
- :return:
- """
- edge = [
- (poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
- (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
- (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
- (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])
- ]
- return np.sum(edge) / 2.
-
- def gen_quad_from_poly(self, poly):
- """
- Generate min area quad from poly.
- """
- point_num = poly.shape[0]
- min_area_quad = np.zeros((4, 2), dtype=np.float32)
- if True:
- rect = cv2.minAreaRect(poly.astype(np.int32)) # (center (x,y), (width, height), angle of rotation)
- center_point = rect[0]
- box = np.array(cv2.boxPoints(rect))
-
- first_point_idx = 0
- min_dist = 1e4
- for i in range(4):
- dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
- np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
- np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
- np.linalg.norm(box[(i + 3) % 4] - poly[-1])
- if dist < min_dist:
- min_dist = dist
- first_point_idx = i
- for i in range(4):
- min_area_quad[i] = box[(first_point_idx + i) % 4]
-
- return min_area_quad
-
- def check_and_validate_polys(self, polys, tags, xxx_todo_changeme):
- """
- check so that the text poly is in the same direction,
- and also filter some invalid polygons
- :param polys:
- :param tags:
- :return:
- """
- (h, w) = xxx_todo_changeme
- if polys.shape[0] == 0:
- return polys, np.array([]), np.array([])
- polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
- polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
-
- validated_polys = []
- validated_tags = []
- hv_tags = []
- for poly, tag in zip(polys, tags):
- quad = self.gen_quad_from_poly(poly)
- p_area = self.quad_area(quad)
- if abs(p_area) < 1:
- print('invalid poly')
- continue
- if p_area > 0:
- if tag == False:
- print('poly in wrong direction')
- tag = True # reversed cases should be ignore
- poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1), :]
- quad = quad[(0, 3, 2, 1), :]
-
- len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] - quad[2])
- len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2])
- hv_tag = 1
-
- if len_w * 2.0 < len_h:
- hv_tag = 0
-
- validated_polys.append(poly)
- validated_tags.append(tag)
- hv_tags.append(hv_tag)
- return np.array(validated_polys), np.array(validated_tags), np.array(hv_tags)
-
- def crop_area(self, im, polys, tags, hv_tags, txts, crop_background=False, max_tries=25):
- """
- make random crop from the input image
- :param im:
- :param polys:
- :param tags:
- :param crop_background:
- :param max_tries: 50 -> 25
- :return:
- """
- h, w, _ = im.shape
- pad_h = h // 10
- pad_w = w // 10
- h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
- w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
- for poly in polys:
- poly = np.round(poly, decimals=0).astype(np.int32)
- minx = np.min(poly[:, 0])
- maxx = np.max(poly[:, 0])
- w_array[minx + pad_w: maxx + pad_w] = 1
- miny = np.min(poly[:, 1])
- maxy = np.max(poly[:, 1])
- h_array[miny + pad_h: maxy + pad_h] = 1
- # ensure the cropped area not across a text
- h_axis = np.where(h_array == 0)[0]
- w_axis = np.where(w_array == 0)[0]
- if len(h_axis) == 0 or len(w_axis) == 0:
- return im, polys, tags, hv_tags, txts
- for i in range(max_tries):
- xx = np.random.choice(w_axis, size=2)
- xmin = np.min(xx) - pad_w
- xmax = np.max(xx) - pad_w
- xmin = np.clip(xmin, 0, w - 1)
- xmax = np.clip(xmax, 0, w - 1)
- yy = np.random.choice(h_axis, size=2)
- ymin = np.min(yy) - pad_h
- ymax = np.max(yy) - pad_h
- ymin = np.clip(ymin, 0, h - 1)
- ymax = np.clip(ymax, 0, h - 1)
- # if xmax - xmin < ARGS.min_crop_side_ratio * w or \
- # ymax - ymin < ARGS.min_crop_side_ratio * h:
- if xmax - xmin < self.min_crop_size or \
- ymax - ymin < self.min_crop_size:
- # area too small
- continue
- if polys.shape[0] != 0:
- poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \
- & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax)
- selected_polys = np.where(np.sum(poly_axis_in_area, axis=1) == 4)[0]
- else:
- selected_polys = []
- if len(selected_polys) == 0:
- # no text in this area
- if crop_background:
- txts_tmp = []
- for selected_poly in selected_polys:
- txts_tmp.append(txts[selected_poly])
- txts = txts_tmp
- return im[ymin : ymax + 1, xmin : xmax + 1, :], \
- polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts
- else:
- continue
- im = im[ymin: ymax + 1, xmin: xmax + 1, :]
- polys = polys[selected_polys]
- tags = tags[selected_polys]
- hv_tags = hv_tags[selected_polys]
- txts_tmp = []
- for selected_poly in selected_polys:
- txts_tmp.append(txts[selected_poly])
- txts = txts_tmp
- polys[:, :, 0] -= xmin
- polys[:, :, 1] -= ymin
- return im, polys, tags, hv_tags, txts
-
- return im, polys, tags, hv_tags, txts
-
- def generate_direction_map(self, poly_quads, direction_map):
- """
- """
- width_list = []
- height_list = []
- for quad in poly_quads:
- quad_w = (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) / 2.0
- quad_h = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[2] - quad[1])) / 2.0
- width_list.append(quad_w)
- height_list.append(quad_h)
- norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0)
- average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0)
-
- for quad in poly_quads:
- direct_vector_full = ((quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0
- direct_vector = direct_vector_full / (np.linalg.norm(direct_vector_full) + 1e-6) * norm_width
- direction_label = tuple(map(float, [direct_vector[0], direct_vector[1], 1.0 / (average_height + 1e-6)]))
- cv2.fillPoly(direction_map, quad.round().astype(np.int32)[np.newaxis, :, :], direction_label)
- return direction_map
-
- def calculate_average_height(self, poly_quads):
- """
- """
- height_list = []
- for quad in poly_quads:
- quad_h = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[2] - quad[1])) / 2.0
- height_list.append(quad_h)
- average_height = max(sum(height_list) / len(height_list), 1.0)
- return average_height
-
- def generate_tcl_label(self, hw, polys, tags, ds_ratio,
- tcl_ratio=0.3, shrink_ratio_of_width=0.15):
- """
- Generate polygon.
- """
- h, w = hw
- h, w = int(h * ds_ratio), int(w * ds_ratio)
- polys = polys * ds_ratio
-
- score_map = np.zeros((h, w,), dtype=np.float32)
- tbo_map = np.zeros((h, w, 5), dtype=np.float32)
- training_mask = np.ones((h, w,), dtype=np.float32)
- direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape([1, 1, 3]).astype(np.float32)
-
- for poly_idx, poly_tag in enumerate(zip(polys, tags)):
- poly = poly_tag[0]
- tag = poly_tag[1]
-
- # generate min_area_quad
- min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
- min_area_quad_h = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
- np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
- min_area_quad_w = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
- np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
-
- if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \
- or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio:
- continue
-
- if tag:
- # continue
- cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0.15)
- else:
- tcl_poly = self.poly2tcl(poly, tcl_ratio)
- tcl_quads = self.poly2quads(tcl_poly)
- poly_quads = self.poly2quads(poly)
- # stcl map
- stcl_quads, quad_index = self.shrink_poly_along_width(tcl_quads, shrink_ratio_of_width=shrink_ratio_of_width,
- expand_height_ratio=1.0 / tcl_ratio)
- # generate tcl map
- cv2.fillPoly(score_map, np.round(stcl_quads).astype(np.int32), 1.0)
-
- # generate tbo map
- for idx, quad in enumerate(stcl_quads):
- quad_mask = np.zeros((h, w), dtype=np.float32)
- quad_mask = cv2.fillPoly(quad_mask, np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0)
- tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]], quad_mask, tbo_map)
- return score_map, tbo_map, training_mask
-
- def generate_tvo_and_tco(self, hw, polys, tags, tcl_ratio=0.3, ds_ratio=0.25):
- """
- Generate tcl map, tvo map and tbo map.
- """
- h, w = hw
- h, w = int(h * ds_ratio), int(w * ds_ratio)
- polys = polys * ds_ratio
- poly_mask = np.zeros((h, w), dtype=np.float32)
-
- tvo_map = np.ones((9, h, w), dtype=np.float32)
- tvo_map[0:-1:2] = np.tile(np.arange(0, w), (h, 1))
- tvo_map[1:-1:2] = np.tile(np.arange(0, w), (h, 1)).T
- poly_tv_xy_map = np.zeros((8, h, w), dtype=np.float32)
-
- # tco map
- tco_map = np.ones((3, h, w), dtype=np.float32)
- tco_map[0] = np.tile(np.arange(0, w), (h, 1))
- tco_map[1] = np.tile(np.arange(0, w), (h, 1)).T
- poly_tc_xy_map = np.zeros((2, h, w), dtype=np.float32)
-
- poly_short_edge_map = np.ones((h, w), dtype=np.float32)
-
- for poly, poly_tag in zip(polys, tags):
-
- if poly_tag == True:
- continue
-
- # adjust point order for vertical poly
- poly = self.adjust_point(poly)
-
- # generate min_area_quad
- min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
- min_area_quad_h = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
- np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
- min_area_quad_w = 0.5 * (np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
- np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
-
- # generate tcl map and text, 128 * 128
- tcl_poly = self.poly2tcl(poly, tcl_ratio)
-
- # generate poly_tv_xy_map
- for idx in range(4):
- cv2.fillPoly(poly_tv_xy_map[2 * idx],
- np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
- float(min(max(min_area_quad[idx, 0], 0), w)))
- cv2.fillPoly(poly_tv_xy_map[2 * idx + 1],
- np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
- float(min(max(min_area_quad[idx, 1], 0), h)))
-
- # generate poly_tc_xy_map
- for idx in range(2):
- cv2.fillPoly(poly_tc_xy_map[idx],
- np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), float(center_point[idx]))
-
- # generate poly_short_edge_map
- cv2.fillPoly(poly_short_edge_map,
- np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
- float(max(min(min_area_quad_h, min_area_quad_w), 1.0)))
-
- # generate poly_mask and training_mask
- cv2.fillPoly(poly_mask, np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32), 1)
-
- tvo_map *= poly_mask
- tvo_map[:8] -= poly_tv_xy_map
- tvo_map[-1] /= poly_short_edge_map
- tvo_map = tvo_map.transpose((1, 2, 0))
-
- tco_map *= poly_mask
- tco_map[:2] -= poly_tc_xy_map
- tco_map[-1] /= poly_short_edge_map
- tco_map = tco_map.transpose((1, 2, 0))
-
- return tvo_map, tco_map
-
- def adjust_point(self, poly):
- """
- adjust point order.
- """
- point_num = poly.shape[0]
- if point_num == 4:
- len_1 = np.linalg.norm(poly[0] - poly[1])
- len_2 = np.linalg.norm(poly[1] - poly[2])
- len_3 = np.linalg.norm(poly[2] - poly[3])
- len_4 = np.linalg.norm(poly[3] - poly[0])
-
- if (len_1 + len_3) * 1.5 < (len_2 + len_4):
- poly = poly[[1, 2, 3, 0], :]
-
- elif point_num > 4:
- vector_1 = poly[0] - poly[1]
- vector_2 = poly[1] - poly[2]
- cos_theta = np.dot(vector_1, vector_2) / (np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6)
- theta = np.arccos(np.round(cos_theta, decimals=4))
-
- if abs(theta) > (70 / 180 * math.pi):
- index = list(range(1, point_num)) + [0]
- poly = poly[np.array(index), :]
- return poly
-
- def gen_min_area_quad_from_poly(self, poly):
- """
- Generate min area quad from poly.
- """
- point_num = poly.shape[0]
- min_area_quad = np.zeros((4, 2), dtype=np.float32)
- if point_num == 4:
- min_area_quad = poly
- center_point = np.sum(poly, axis=0) / 4
- else:
- rect = cv2.minAreaRect(poly.astype(np.int32)) # (center (x,y), (width, height), angle of rotation)
- center_point = rect[0]
- box = np.array(cv2.boxPoints(rect))
-
- first_point_idx = 0
- min_dist = 1e4
- for i in range(4):
- dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
- np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
- np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
- np.linalg.norm(box[(i + 3) % 4] - poly[-1])
- if dist < min_dist:
- min_dist = dist
- first_point_idx = i
-
- for i in range(4):
- min_area_quad[i] = box[(first_point_idx + i) % 4]
-
- return min_area_quad, center_point
-
- def shrink_quad_along_width(self, quad, begin_width_ratio=0., end_width_ratio=1.):
- """
- Generate shrink_quad_along_width.
- """
- ratio_pair = np.array([[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
- p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
- p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
- return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
-
- def shrink_poly_along_width(self, quads, shrink_ratio_of_width, expand_height_ratio=1.0):
- """
- shrink poly with given length.
- """
- upper_edge_list = []
-
- def get_cut_info(edge_len_list, cut_len):
- for idx, edge_len in enumerate(edge_len_list):
- cut_len -= edge_len
- if cut_len <= 0.000001:
- ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx]
- return idx, ratio
-
- for quad in quads:
- upper_edge_len = np.linalg.norm(quad[0] - quad[1])
- upper_edge_list.append(upper_edge_len)
-
- # length of left edge and right edge.
- left_length = np.linalg.norm(quads[0][0] - quads[0][3]) * expand_height_ratio
- right_length = np.linalg.norm(quads[-1][1] - quads[-1][2]) * expand_height_ratio
-
- shrink_length = min(left_length, right_length, sum(upper_edge_list)) * shrink_ratio_of_width
- # shrinking length
- upper_len_left = shrink_length
- upper_len_right = sum(upper_edge_list) - shrink_length
-
- left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left)
- left_quad = self.shrink_quad_along_width(quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1)
- right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right)
- right_quad = self.shrink_quad_along_width(quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio)
-
- out_quad_list = []
- if left_idx == right_idx:
- out_quad_list.append([left_quad[0], right_quad[1], right_quad[2], left_quad[3]])
- else:
- out_quad_list.append(left_quad)
- for idx in range(left_idx + 1, right_idx):
- out_quad_list.append(quads[idx])
- out_quad_list.append(right_quad)
-
- return np.array(out_quad_list), list(range(left_idx, right_idx + 1))
-
- def vector_angle(self, A, B):
- """
- Calculate the angle between vector AB and x-axis positive direction.
- """
- AB = np.array([B[1] - A[1], B[0] - A[0]])
- return np.arctan2(*AB)
-
- def theta_line_cross_point(self, theta, point):
- """
- Calculate the line through given point and angle in ax + by + c =0 form.
- """
- x, y = point
- cos = np.cos(theta)
- sin = np.sin(theta)
- return [sin, -cos, cos * y - sin * x]
-
- def line_cross_two_point(self, A, B):
- """
- Calculate the line through given point A and B in ax + by + c =0 form.
- """
- angle = self.vector_angle(A, B)
- return self.theta_line_cross_point(angle, A)
-
- def average_angle(self, poly):
- """
- Calculate the average angle between left and right edge in given poly.
- """
- p0, p1, p2, p3 = poly
- angle30 = self.vector_angle(p3, p0)
- angle21 = self.vector_angle(p2, p1)
- return (angle30 + angle21) / 2
-
- def line_cross_point(self, line1, line2):
- """
- line1 and line2 in 0=ax+by+c form, compute the cross point of line1 and line2
- """
- a1, b1, c1 = line1
- a2, b2, c2 = line2
- d = a1 * b2 - a2 * b1
-
- if d == 0:
- #print("line1", line1)
- #print("line2", line2)
- print('Cross point does not exist')
- return np.array([0, 0], dtype=np.float32)
- else:
- x = (b1 * c2 - b2 * c1) / d
- y = (a2 * c1 - a1 * c2) / d
-
- return np.array([x, y], dtype=np.float32)
-
- def quad2tcl(self, poly, ratio):
- """
- Generate center line by poly clock-wise point. (4, 2)
- """
- ratio_pair = np.array([[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
- p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair
- p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair
- return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]])
-
- def poly2tcl(self, poly, ratio):
- """
- Generate center line by poly clock-wise point.
- """
- ratio_pair = np.array([[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
- tcl_poly = np.zeros_like(poly)
- point_num = poly.shape[0]
-
- for idx in range(point_num // 2):
- point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx]) * ratio_pair
- tcl_poly[idx] = point_pair[0]
- tcl_poly[point_num - 1 - idx] = point_pair[1]
- return tcl_poly
-
- def gen_quad_tbo(self, quad, tcl_mask, tbo_map):
- """
- Generate tbo_map for give quad.
- """
- # upper and lower line function: ax + by + c = 0;
- up_line = self.line_cross_two_point(quad[0], quad[1])
- lower_line = self.line_cross_two_point(quad[3], quad[2])
-
- quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2]))
- quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3]))
-
- # average angle of left and right line.
- angle = self.average_angle(quad)
-
- xy_in_poly = np.argwhere(tcl_mask == 1)
- for y, x in xy_in_poly:
- point = (x, y)
- line = self.theta_line_cross_point(angle, point)
- cross_point_upper = self.line_cross_point(up_line, line)
- cross_point_lower = self.line_cross_point(lower_line, line)
- ##FIX, offset reverse
- upper_offset_x, upper_offset_y = cross_point_upper - point
- lower_offset_x, lower_offset_y = cross_point_lower - point
- tbo_map[y, x, 0] = upper_offset_y
- tbo_map[y, x, 1] = upper_offset_x
- tbo_map[y, x, 2] = lower_offset_y
- tbo_map[y, x, 3] = lower_offset_x
- tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2
- return tbo_map
-
- def poly2quads(self, poly):
- """
- Split poly into quads.
- """
- quad_list = []
- point_num = poly.shape[0]
-
- # point pair
- point_pair_list = []
- for idx in range(point_num // 2):
- point_pair = [poly[idx], poly[point_num - 1 - idx]]
- point_pair_list.append(point_pair)
-
- quad_num = point_num // 2 - 1
- for idx in range(quad_num):
- # reshape and adjust to clock-wise
- quad_list.append((np.array(point_pair_list)[[idx, idx + 1]]).reshape(4, 2)[[0, 2, 3, 1]])
-
- return np.array(quad_list)
-
- def extract_polys(self, poly_txt_path):
- """
- Read text_polys, txt_tags, txts from give txt file.
- """
- text_polys, txt_tags, txts = [], [], []
-
- with open(poly_txt_path) as f:
- for line in f.readlines():
- poly_str, txt = line.strip().split('\t')
- poly = map(float, poly_str.split(','))
- text_polys.append(np.array(poly, dtype=np.float32).reshape(-1, 2))
- txts.append(txt)
- if txt == '###':
- txt_tags.append(True)
- else:
- txt_tags.append(False)
-
- return np.array(map(np.array, text_polys)), \
- np.array(txt_tags, dtype=np.bool), txts
-
- def __call__(self, label_infor):
- infor = self.convert_label_infor(label_infor)
- im_path, text_polys, text_tags, text_strs = infor
- im = cv2.imread(im_path)
- if im is None:
- return None
- if text_polys.shape[0] == 0:
- return None
-
- h, w, _ = im.shape
- text_polys, text_tags, hv_tags = self.check_and_validate_polys(text_polys, text_tags, (h, w))
-
- if text_polys.shape[0] == 0:
- return None
-
- #set aspect ratio and keep area fix
- asp_scales = np.arange(1.0, 1.55, 0.1)
- asp_scale = np.random.choice(asp_scales)
-
- if np.random.rand() < 0.5:
- asp_scale = 1.0 / asp_scale
- asp_scale = math.sqrt(asp_scale)
-
- asp_wx = asp_scale
- asp_hy = 1.0 / asp_scale
- im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy)
- text_polys[:, :, 0] *= asp_wx
- text_polys[:, :, 1] *= asp_hy
-
- h, w, _ = im.shape
- if max(h, w) > 2048:
- rd_scale = 2048.0 / max(h, w)
- im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
- text_polys *= rd_scale
- h, w, _ = im.shape
- if min(h, w) < 16:
- return None
-
- #no background
- im, text_polys, text_tags, hv_tags, text_strs = self.crop_area(im, \
- text_polys, text_tags, hv_tags, text_strs, crop_background=False)
- if text_polys.shape[0] == 0:
- return None
- #continue for all ignore case
- if np.sum((text_tags * 1.0)) >= text_tags.size:
- return None
- new_h, new_w, _ = im.shape
- if (new_h is None) or (new_w is None):
- return None
- #resize image
- std_ratio = float(self.input_size) / max(new_w, new_h)
- rand_scales = np.array([0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0])
- rz_scale = std_ratio * np.random.choice(rand_scales)
- im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale)
- text_polys[:, :, 0] *= rz_scale
- text_polys[:, :, 1] *= rz_scale
-
- #add gaussian blur
- if np.random.rand() < 0.1 * 0.5:
- ks = np.random.permutation(5)[0] + 1
- ks = int(ks/2)*2 + 1
- im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0)
- #add brighter
- if np.random.rand() < 0.1 * 0.5:
- im = im * (1.0 + np.random.rand() * 0.5)
- im = np.clip(im, 0.0, 255.0)
- #add darker
- if np.random.rand() < 0.1 * 0.5:
- im = im * (1.0 - np.random.rand() * 0.5)
- im = np.clip(im, 0.0, 255.0)
-
- # Padding the im to [input_size, input_size]
- new_h, new_w, _ = im.shape
- if min(new_w, new_h) < self.input_size * 0.5:
- return None
-
- im_padded = np.ones((self.input_size, self.input_size, 3), dtype=np.float32)
- im_padded[:, :, 2] = 0.485 * 255
- im_padded[:, :, 1] = 0.456 * 255
- im_padded[:, :, 0] = 0.406 * 255
-
- # Random the start position
- del_h = self.input_size - new_h
- del_w = self.input_size - new_w
- sh, sw = 0, 0
- if del_h > 1:
- sh = int(np.random.rand() * del_h)
- if del_w > 1:
- sw = int(np.random.rand() * del_w)
-
- # Padding
- im_padded[sh: sh + new_h, sw: sw + new_w, :] = im.copy()
- text_polys[:, :, 0] += sw
- text_polys[:, :, 1] += sh
-
- score_map, border_map, training_mask = self.generate_tcl_label((self.input_size, self.input_size),
- text_polys, text_tags, 0.25)
-
- # SAST head
- tvo_map, tco_map = self.generate_tvo_and_tco((self.input_size, self.input_size), text_polys, text_tags, tcl_ratio=0.3, ds_ratio=0.25)
- # print("test--------tvo_map shape:", tvo_map.shape)
-
- im_padded[:, :, 2] -= 0.485 * 255
- im_padded[:, :, 1] -= 0.456 * 255
- im_padded[:, :, 0] -= 0.406 * 255
- im_padded[:, :, 2] /= (255.0 * 0.229)
- im_padded[:, :, 1] /= (255.0 * 0.224)
- im_padded[:, :, 0] /= (255.0 * 0.225)
- im_padded = im_padded.transpose((2, 0, 1))
-
- return im_padded[::-1, :, :], score_map[np.newaxis, :, :], border_map.transpose((2, 0, 1)), training_mask[np.newaxis, :, :], tvo_map.transpose((2, 0, 1)), tco_map.transpose((2, 0, 1))
-
-
-class SASTProcessTest(object):
- """
- SAST process function for test
- """
- def __init__(self, params):
- super(SASTProcessTest, self).__init__()
- if 'max_side_len' in params:
- self.max_side_len = params['max_side_len']
- else:
- self.max_side_len = 2400
-
- def resize_image(self, im):
- """
- resize image to a size multiple of max_stride which is required by the network
- :param im: the resized image
- :param max_side_len: limit of max image size to avoid out of memory in gpu
- :return: the resized image and the resize ratio
- """
- h, w, _ = im.shape
-
- resize_w = w
- resize_h = h
-
- # Fix the longer side
- if resize_h > resize_w:
- ratio = float(self.max_side_len) / resize_h
- else:
- ratio = float(self.max_side_len) / resize_w
-
- resize_h = int(resize_h * ratio)
- resize_w = int(resize_w * ratio)
-
- max_stride = 128
- resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
- resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
- im = cv2.resize(im, (int(resize_w), int(resize_h)))
- ratio_h = resize_h / float(h)
- ratio_w = resize_w / float(w)
-
- return im, (ratio_h, ratio_w)
-
- def __call__(self, im):
- src_h, src_w, _ = im.shape
- im, (ratio_h, ratio_w) = self.resize_image(im)
- img_mean = [0.485, 0.456, 0.406]
- img_std = [0.229, 0.224, 0.225]
- im = im[:, :, ::-1].astype(np.float32)
- im = im / 255
- im -= img_mean
- im /= img_std
- im = im.transpose((2, 0, 1))
- im = im[np.newaxis, :]
- return [im, (ratio_h, ratio_w, src_h, src_w)]
diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..047664bffc74bb574e4d4c98380b503c05b9d18b
--- /dev/null
+++ b/ppocr/data/imaug/__init__.py
@@ -0,0 +1,59 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from .iaa_augment import IaaAugment
+from .make_border_map import MakeBorderMap
+from .make_shrink_map import MakeShrinkMap
+from .random_crop_data import EastRandomCropData, PSERandomCrop
+
+from .rec_img_aug import RecAug, RecResizeImg
+
+from .operators import *
+from .label_ops import *
+
+
+def transform(data, ops=None):
+ """ transform """
+ if ops is None:
+ ops = []
+ for op in ops:
+ data = op(data)
+ if data is None:
+ return None
+ return data
+
+
+def create_operators(op_param_list, global_config=None):
+ """
+ create operators based on the config
+
+ Args:
+ params(list): a dict list, used to create some operators
+ """
+ assert isinstance(op_param_list, list), ('operator config should be a list')
+ ops = []
+ for operator in op_param_list:
+ assert isinstance(operator,
+ dict) and len(operator) == 1, "yaml format error"
+ op_name = list(operator)[0]
+ param = {} if operator[op_name] is None else operator[op_name]
+ if global_config is not None:
+ param.update(global_config)
+ op = eval(op_name)(**param)
+ ops.append(op)
+ return ops
diff --git a/ppocr/data/imaug/iaa_augment.py b/ppocr/data/imaug/iaa_augment.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ce6bd4209034389df04334a83717142ca8c7b40
--- /dev/null
+++ b/ppocr/data/imaug/iaa_augment.py
@@ -0,0 +1,101 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import imgaug
+import imgaug.augmenters as iaa
+
+
+class AugmenterBuilder(object):
+ def __init__(self):
+ pass
+
+ def build(self, args, root=True):
+ if args is None or len(args) == 0:
+ return None
+ elif isinstance(args, list):
+ if root:
+ sequence = [self.build(value, root=False) for value in args]
+ return iaa.Sequential(sequence)
+ else:
+ return getattr(iaa, args[0])(
+ *[self.to_tuple_if_list(a) for a in args[1:]])
+ elif isinstance(args, dict):
+ cls = getattr(iaa, args['type'])
+ return cls(**{
+ k: self.to_tuple_if_list(v)
+ for k, v in args['args'].items()
+ })
+ else:
+ raise RuntimeError('unknown augmenter arg: ' + str(args))
+
+ def to_tuple_if_list(self, obj):
+ if isinstance(obj, list):
+ return tuple(obj)
+ return obj
+
+
+class IaaAugment():
+ def __init__(self, augmenter_args=None, **kwargs):
+ if augmenter_args is None:
+ augmenter_args = [{
+ 'type': 'Fliplr',
+ 'args': {
+ 'p': 0.5
+ }
+ }, {
+ 'type': 'Affine',
+ 'args': {
+ 'rotate': [-10, 10]
+ }
+ }, {
+ 'type': 'Resize',
+ 'args': {
+ 'size': [0.5, 3]
+ }
+ }]
+ self.augmenter = AugmenterBuilder().build(augmenter_args)
+
+ def __call__(self, data):
+ image = data['image']
+ shape = image.shape
+
+ if self.augmenter:
+ aug = self.augmenter.to_deterministic()
+ data['image'] = aug.augment_image(image)
+ data = self.may_augment_annotation(aug, data, shape)
+ return data
+
+ def may_augment_annotation(self, aug, data, shape):
+ if aug is None:
+ return data
+
+ line_polys = []
+ for poly in data['polys']:
+ new_poly = self.may_augment_poly(aug, shape, poly)
+ line_polys.append(new_poly)
+ data['polys'] = np.array(line_polys)
+ return data
+
+ def may_augment_poly(self, aug, img_shape, poly):
+ keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
+ keypoints = aug.augment_keypoints(
+ [imgaug.KeypointsOnImage(
+ keypoints, shape=img_shape)])[0].keypoints
+ poly = [(p.x, p.y) for p in keypoints]
+ return poly
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..72038c8f8f7550bec4597e38cc11e511100a072e
--- /dev/null
+++ b/ppocr/data/imaug/label_ops.py
@@ -0,0 +1,197 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+from ppocr.utils.logging import get_logger
+
+
+class DetLabelEncode(object):
+ def __init__(self, **kwargs):
+ pass
+
+ def __call__(self, data):
+ import json
+ label = data['label']
+ label = json.loads(label)
+ nBox = len(label)
+ boxes, txts, txt_tags = [], [], []
+ for bno in range(0, nBox):
+ box = label[bno]['points']
+ txt = label[bno]['transcription']
+ boxes.append(box)
+ txts.append(txt)
+ if txt in ['*', '###']:
+ txt_tags.append(True)
+ else:
+ txt_tags.append(False)
+ boxes = np.array(boxes, dtype=np.float32)
+ txt_tags = np.array(txt_tags, dtype=np.bool)
+
+ data['polys'] = boxes
+ data['texts'] = txts
+ data['ignore_tags'] = txt_tags
+ return data
+
+ def order_points_clockwise(self, pts):
+ rect = np.zeros((4, 2), dtype="float32")
+ s = pts.sum(axis=1)
+ rect[0] = pts[np.argmin(s)]
+ rect[2] = pts[np.argmax(s)]
+ diff = np.diff(pts, axis=1)
+ rect[1] = pts[np.argmin(diff)]
+ rect[3] = pts[np.argmax(diff)]
+ return rect
+
+
+class BaseRecLabelEncode(object):
+ """ Convert between text-label and text-index """
+
+ def __init__(self,
+ max_text_length,
+ character_dict_path=None,
+ character_type='ch',
+ use_space_char=False):
+ support_character_type = ['ch', 'en', 'en_sensitive']
+ assert character_type in support_character_type, "Only {} are supported now but get {}".format(
+ support_character_type, self.character_str)
+
+ self.max_text_len = max_text_length
+ if character_type == "en":
+ self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+ dict_character = list(self.character_str)
+ elif character_type == "ch":
+ self.character_str = ""
+ assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
+ with open(character_dict_path, "rb") as fin:
+ lines = fin.readlines()
+ for line in lines:
+ line = line.decode('utf-8').strip("\n").strip("\r\n")
+ self.character_str += line
+ if use_space_char:
+ self.character_str += " "
+ dict_character = list(self.character_str)
+ elif character_type == "en_sensitive":
+ # same with ASTER setting (use 94 char).
+ import string
+ self.character_str = string.printable[:-6]
+ dict_character = list(self.character_str)
+ self.character_type = character_type
+ dict_character = self.add_special_char(dict_character)
+ self.dict = {}
+ for i, char in enumerate(dict_character):
+ self.dict[char] = i
+ self.character = dict_character
+
+ def add_special_char(self, dict_character):
+ return dict_character
+
+ def encode(self, text):
+ """convert text-label into text-index.
+ input:
+ text: text labels of each image. [batch_size]
+
+ output:
+ text: concatenated text index for CTCLoss.
+ [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
+ length: length of each text. [batch_size]
+ """
+ if len(text) > self.max_text_len:
+ return None
+ if self.character_type == "en":
+ text = text.lower()
+ text_list = []
+ for char in text:
+ if char not in self.dict:
+ # logger = get_logger()
+ # logger.warning('{} is not in dict'.format(char))
+ continue
+ text_list.append(self.dict[char])
+ if len(text_list) == 0:
+ return None
+ return text_list
+
+ def get_ignored_tokens(self):
+ return [0] # for ctc blank
+
+
+class CTCLabelEncode(BaseRecLabelEncode):
+ """ Convert between text-label and text-index """
+
+ def __init__(self,
+ max_text_length,
+ character_dict_path=None,
+ character_type='ch',
+ use_space_char=False,
+ **kwargs):
+ super(CTCLabelEncode,
+ self).__init__(max_text_length, character_dict_path,
+ character_type, use_space_char)
+
+ def __call__(self, data):
+ text = data['label']
+ text = self.encode(text)
+ if text is None:
+ return None
+ data['length'] = np.array(len(text))
+ text = text + [0] * (self.max_text_len - len(text))
+ data['label'] = np.array(text)
+ return data
+
+ def add_special_char(self, dict_character):
+ dict_character = ['blank'] + dict_character
+ return dict_character
+
+
+class AttnLabelEncode(BaseRecLabelEncode):
+ """ Convert between text-label and text-index """
+
+ def __init__(self,
+ max_text_length,
+ character_dict_path=None,
+ character_type='ch',
+ use_space_char=False,
+ **kwargs):
+ super(AttnLabelEncode,
+ self).__init__(max_text_length, character_dict_path,
+ character_type, use_space_char)
+ self.beg_str = "sos"
+ self.end_str = "eos"
+
+ def add_special_char(self, dict_character):
+ dict_character = [self.beg_str, self.end_str] + dict_character
+ return dict_character
+
+ def __call__(self, text):
+ text = self.encode(text)
+ return text
+
+ def get_ignored_tokens(self):
+ beg_idx = self.get_beg_end_flag_idx("beg")
+ end_idx = self.get_beg_end_flag_idx("end")
+ return [beg_idx, end_idx]
+
+ def get_beg_end_flag_idx(self, beg_or_end):
+ if beg_or_end == "beg":
+ idx = np.array(self.dict[self.beg_str])
+ elif beg_or_end == "end":
+ idx = np.array(self.dict[self.end_str])
+ else:
+ assert False, "Unsupport type %s in get_beg_end_flag_idx" \
+ % beg_or_end
+ return idx
diff --git a/ppocr/data/imaug/make_border_map.py b/ppocr/data/imaug/make_border_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..df53e04de1f693adbdcd732d248063aab35f34f4
--- /dev/null
+++ b/ppocr/data/imaug/make_border_map.py
@@ -0,0 +1,157 @@
+# -*- coding:utf-8 -*-
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+
+np.seterr(divide='ignore', invalid='ignore')
+import pyclipper
+from shapely.geometry import Polygon
+import sys
+import warnings
+
+warnings.simplefilter("ignore")
+
+__all__ = ['MakeBorderMap']
+
+
+class MakeBorderMap(object):
+ def __init__(self,
+ shrink_ratio=0.4,
+ thresh_min=0.3,
+ thresh_max=0.7,
+ **kwargs):
+ self.shrink_ratio = shrink_ratio
+ self.thresh_min = thresh_min
+ self.thresh_max = thresh_max
+
+ def __call__(self, data: dict) -> dict:
+
+ img = data['image']
+ text_polys = data['polys']
+ ignore_tags = data['ignore_tags']
+
+ canvas = np.zeros(img.shape[:2], dtype=np.float32)
+ mask = np.zeros(img.shape[:2], dtype=np.float32)
+
+ for i in range(len(text_polys)):
+ if ignore_tags[i]:
+ continue
+ self.draw_border_map(text_polys[i], canvas, mask=mask)
+ canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min
+
+ data['threshold_map'] = canvas
+ data['threshold_mask'] = mask
+ return data
+
+ def draw_border_map(self, polygon, canvas, mask):
+ polygon = np.array(polygon)
+ assert polygon.ndim == 2
+ assert polygon.shape[1] == 2
+
+ polygon_shape = Polygon(polygon)
+ if polygon_shape.area <= 0:
+ return
+ distance = polygon_shape.area * (
+ 1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+ subject = [tuple(l) for l in polygon]
+ padding = pyclipper.PyclipperOffset()
+ padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+
+ padded_polygon = np.array(padding.Execute(distance)[0])
+ cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
+
+ xmin = padded_polygon[:, 0].min()
+ xmax = padded_polygon[:, 0].max()
+ ymin = padded_polygon[:, 1].min()
+ ymax = padded_polygon[:, 1].max()
+ width = xmax - xmin + 1
+ height = ymax - ymin + 1
+
+ polygon[:, 0] = polygon[:, 0] - xmin
+ polygon[:, 1] = polygon[:, 1] - ymin
+
+ xs = np.broadcast_to(
+ np.linspace(
+ 0, width - 1, num=width).reshape(1, width), (height, width))
+ ys = np.broadcast_to(
+ np.linspace(
+ 0, height - 1, num=height).reshape(height, 1), (height, width))
+
+ distance_map = np.zeros(
+ (polygon.shape[0], height, width), dtype=np.float32)
+ for i in range(polygon.shape[0]):
+ j = (i + 1) % polygon.shape[0]
+ absolute_distance = self._distance(xs, ys, polygon[i], polygon[j])
+ distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
+ distance_map = distance_map.min(axis=0)
+
+ xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
+ xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
+ ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
+ ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
+ canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
+ 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
+ xmin_valid - xmin:xmax_valid - xmax + width],
+ canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
+
+ def _distance(self, xs, ys, point_1, point_2):
+ '''
+ compute the distance from point to a line
+ ys: coordinates in the first axis
+ xs: coordinates in the second axis
+ point_1, point_2: (x, y), the end of the line
+ '''
+ height, width = xs.shape[:2]
+ square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[
+ 1])
+ square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[
+ 1])
+ square_distance = np.square(point_1[0] - point_2[0]) + np.square(
+ point_1[1] - point_2[1])
+
+ cosin = (square_distance - square_distance_1 - square_distance_2) / (
+ 2 * np.sqrt(square_distance_1 * square_distance_2))
+ square_sin = 1 - np.square(cosin)
+ square_sin = np.nan_to_num(square_sin)
+ result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
+ square_distance)
+
+ result[cosin <
+ 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin
+ < 0]
+ # self.extend_line(point_1, point_2, result)
+ return result
+
+ def extend_line(self, point_1, point_2, result, shrink_ratio):
+ ex_point_1 = (int(
+ round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
+ int(
+ round(point_1[1] + (point_1[1] - point_2[1]) * (
+ 1 + shrink_ratio))))
+ cv2.line(
+ result,
+ tuple(ex_point_1),
+ tuple(point_1),
+ 4096.0,
+ 1,
+ lineType=cv2.LINE_AA,
+ shift=0)
+ ex_point_2 = (int(
+ round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
+ int(
+ round(point_2[1] + (point_2[1] - point_1[1]) * (
+ 1 + shrink_ratio))))
+ cv2.line(
+ result,
+ tuple(ex_point_2),
+ tuple(point_2),
+ 4096.0,
+ 1,
+ lineType=cv2.LINE_AA,
+ shift=0)
+ return ex_point_1, ex_point_2
diff --git a/ppocr/data/imaug/make_shrink_map.py b/ppocr/data/imaug/make_shrink_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..a66706f2bff25680a3597ef67ce5fa809747cf05
--- /dev/null
+++ b/ppocr/data/imaug/make_shrink_map.py
@@ -0,0 +1,94 @@
+# -*- coding:utf-8 -*-
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+from shapely.geometry import Polygon
+import pyclipper
+
+__all__ = ['MakeShrinkMap']
+
+
+class MakeShrinkMap(object):
+ r'''
+ Making binary mask from detection data with ICDAR format.
+ Typically following the process of class `MakeICDARData`.
+ '''
+
+ def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs):
+ self.min_text_size = min_text_size
+ self.shrink_ratio = shrink_ratio
+
+ def __call__(self, data):
+ image = data['image']
+ text_polys = data['polys']
+ ignore_tags = data['ignore_tags']
+
+ h, w = image.shape[:2]
+ text_polys, ignore_tags = self.validate_polygons(text_polys,
+ ignore_tags, h, w)
+ gt = np.zeros((h, w), dtype=np.float32)
+ # gt = np.zeros((1, h, w), dtype=np.float32)
+ mask = np.ones((h, w), dtype=np.float32)
+ for i in range(len(text_polys)):
+ polygon = text_polys[i]
+ height = max(polygon[:, 1]) - min(polygon[:, 1])
+ width = max(polygon[:, 0]) - min(polygon[:, 0])
+ if ignore_tags[i] or min(height, width) < self.min_text_size:
+ cv2.fillPoly(mask,
+ polygon.astype(np.int32)[np.newaxis, :, :], 0)
+ ignore_tags[i] = True
+ else:
+ polygon_shape = Polygon(polygon)
+ distance = polygon_shape.area * (
+ 1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+ subject = [tuple(l) for l in text_polys[i]]
+ padding = pyclipper.PyclipperOffset()
+ padding.AddPath(subject, pyclipper.JT_ROUND,
+ pyclipper.ET_CLOSEDPOLYGON)
+ shrinked = padding.Execute(-distance)
+ if shrinked == []:
+ cv2.fillPoly(mask,
+ polygon.astype(np.int32)[np.newaxis, :, :], 0)
+ ignore_tags[i] = True
+ continue
+ shrinked = np.array(shrinked[0]).reshape(-1, 2)
+ cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
+ # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
+
+ data['shrink_map'] = gt
+ data['shrink_mask'] = mask
+ return data
+
+ def validate_polygons(self, polygons, ignore_tags, h, w):
+ '''
+ polygons (numpy.array, required): of shape (num_instances, num_points, 2)
+ '''
+ if len(polygons) == 0:
+ return polygons, ignore_tags
+ assert len(polygons) == len(ignore_tags)
+ for polygon in polygons:
+ polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
+ polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
+
+ for i in range(len(polygons)):
+ area = self.polygon_area(polygons[i])
+ if abs(area) < 1:
+ ignore_tags[i] = True
+ if area > 0:
+ polygons[i] = polygons[i][::-1, :]
+ return polygons, ignore_tags
+
+ def polygon_area(self, polygon):
+ # return cv2.contourArea(polygon.astype(np.float32))
+ edge = 0
+ for i in range(polygon.shape[0]):
+ next_index = (i + 1) % polygon.shape[0]
+ edge += (polygon[next_index, 0] - polygon[i, 0]) * (
+ polygon[next_index, 1] - polygon[i, 1])
+
+ return edge / 2.
diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py
new file mode 100644
index 0000000000000000000000000000000000000000..36b1335fae12c3606b5d855564378e86a6712b61
--- /dev/null
+++ b/ppocr/data/imaug/operators.py
@@ -0,0 +1,185 @@
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import sys
+import six
+import cv2
+import numpy as np
+
+
+class DecodeImage(object):
+ """ decode image """
+
+ def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+ self.img_mode = img_mode
+ self.channel_first = channel_first
+
+ def __call__(self, data):
+ img = data['image']
+ if six.PY2:
+ assert type(img) is str and len(
+ img) > 0, "invalid input 'img' in DecodeImage"
+ else:
+ assert type(img) is bytes and len(
+ img) > 0, "invalid input 'img' in DecodeImage"
+ img = np.frombuffer(img, dtype='uint8')
+ img = cv2.imdecode(img, 1)
+ if self.img_mode == 'GRAY':
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+ elif self.img_mode == 'RGB':
+ assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+ img = img[:, :, ::-1]
+
+ if self.channel_first:
+ img = img.transpose((2, 0, 1))
+
+ data['image'] = img
+ return data
+
+
+class NormalizeImage(object):
+ """ normalize image such as substract mean, divide std
+ """
+
+ def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
+ if isinstance(scale, str):
+ scale = eval(scale)
+ self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+ mean = mean if mean is not None else [0.485, 0.456, 0.406]
+ std = std if std is not None else [0.229, 0.224, 0.225]
+
+ shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+ self.mean = np.array(mean).reshape(shape).astype('float32')
+ self.std = np.array(std).reshape(shape).astype('float32')
+
+ def __call__(self, data):
+ img = data['image']
+ from PIL import Image
+ if isinstance(img, Image.Image):
+ img = np.array(img)
+
+ assert isinstance(img,
+ np.ndarray), "invalid input 'img' in NormalizeImage"
+ data['image'] = (
+ img.astype('float32') * self.scale - self.mean) / self.std
+ return data
+
+
+class ToCHWImage(object):
+ """ convert hwc image to chw image
+ """
+
+ def __init__(self, **kwargs):
+ pass
+
+ def __call__(self, data):
+ img = data['image']
+ from PIL import Image
+ if isinstance(img, Image.Image):
+ img = np.array(img)
+ data['image'] = img.transpose((2, 0, 1))
+ return data
+
+
+class keepKeys(object):
+ def __init__(self, keep_keys, **kwargs):
+ self.keep_keys = keep_keys
+
+ def __call__(self, data):
+ data_list = []
+ for key in self.keep_keys:
+ data_list.append(data[key])
+ return data_list
+
+
+class DetResizeForTest(object):
+ def __init__(self, **kwargs):
+ super(DetResizeForTest, self).__init__()
+ self.resize_type = 0
+ if 'image_shape' in kwargs:
+ self.image_shape = kwargs['image_shape']
+ self.resize_type = 1
+ if 'limit_side_len' in kwargs:
+ self.limit_side_len = kwargs['limit_side_len']
+ self.limit_type = kwargs.get('limit_type', 'min')
+ else:
+ self.limit_side_len = 736
+ self.limit_type = 'min'
+
+ def __call__(self, data):
+ img = data['image']
+
+ if self.resize_type == 0:
+ img, shape = self.resize_image_type0(img)
+ else:
+ img, shape = self.resize_image_type1(img)
+ data['image'] = img
+ data['shape'] = shape
+ return data
+
+ def resize_image_type1(self, img):
+ resize_h, resize_w = self.image_shape
+ ori_h, ori_w = img.shape[:2] # (h, w, c)
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
+ return img, np.array([ori_h, ori_w])
+
+ def resize_image_type0(self, img):
+ """
+ resize image to a size multiple of 32 which is required by the network
+ args:
+ img(array): array with shape [h, w, c]
+ return(tuple):
+ img, (ratio_h, ratio_w)
+ """
+ limit_side_len = self.limit_side_len
+ h, w, _ = img.shape
+
+ # limit the max side
+ if self.limit_type == 'max':
+ if max(h, w) > limit_side_len:
+ if h > w:
+ ratio = float(limit_side_len) / h
+ else:
+ ratio = float(limit_side_len) / w
+ else:
+ ratio = 1.
+ else:
+ if min(h, w) < limit_side_len:
+ if h < w:
+ ratio = float(limit_side_len) / h
+ else:
+ ratio = float(limit_side_len) / w
+ else:
+ ratio = 1.
+ resize_h = int(h * ratio)
+ resize_w = int(w * ratio)
+
+ resize_h = int(round(resize_h / 32) * 32)
+ resize_w = int(round(resize_w / 32) * 32)
+
+ try:
+ if int(resize_w) <= 0 or int(resize_h) <= 0:
+ return None, (None, None)
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
+ except:
+ print(img.shape, resize_w, resize_h)
+ sys.exit(0)
+ return img, np.array([h, w])
diff --git a/ppocr/data/imaug/random_crop_data.py b/ppocr/data/imaug/random_crop_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d67cff61d6f340be6d80d8243c68909a94c4e88
--- /dev/null
+++ b/ppocr/data/imaug/random_crop_data.py
@@ -0,0 +1,210 @@
+# -*- coding:utf-8 -*-
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import cv2
+import random
+
+
+def is_poly_in_rect(poly, x, y, w, h):
+ poly = np.array(poly)
+ if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
+ return False
+ if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
+ return False
+ return True
+
+
+def is_poly_outside_rect(poly, x, y, w, h):
+ poly = np.array(poly)
+ if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
+ return True
+ if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
+ return True
+ return False
+
+
+def split_regions(axis):
+ regions = []
+ min_axis = 0
+ for i in range(1, axis.shape[0]):
+ if axis[i] != axis[i - 1] + 1:
+ region = axis[min_axis:i]
+ min_axis = i
+ regions.append(region)
+ return regions
+
+
+def random_select(axis, max_size):
+ xx = np.random.choice(axis, size=2)
+ xmin = np.min(xx)
+ xmax = np.max(xx)
+ xmin = np.clip(xmin, 0, max_size - 1)
+ xmax = np.clip(xmax, 0, max_size - 1)
+ return xmin, xmax
+
+
+def region_wise_random_select(regions, max_size):
+ selected_index = list(np.random.choice(len(regions), 2))
+ selected_values = []
+ for index in selected_index:
+ axis = regions[index]
+ xx = int(np.random.choice(axis, size=1))
+ selected_values.append(xx)
+ xmin = min(selected_values)
+ xmax = max(selected_values)
+ return xmin, xmax
+
+
+def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
+ h, w, _ = im.shape
+ h_array = np.zeros(h, dtype=np.int32)
+ w_array = np.zeros(w, dtype=np.int32)
+ for points in text_polys:
+ points = np.round(points, decimals=0).astype(np.int32)
+ minx = np.min(points[:, 0])
+ maxx = np.max(points[:, 0])
+ w_array[minx:maxx] = 1
+ miny = np.min(points[:, 1])
+ maxy = np.max(points[:, 1])
+ h_array[miny:maxy] = 1
+ # ensure the cropped area not across a text
+ h_axis = np.where(h_array == 0)[0]
+ w_axis = np.where(w_array == 0)[0]
+
+ if len(h_axis) == 0 or len(w_axis) == 0:
+ return 0, 0, w, h
+
+ h_regions = split_regions(h_axis)
+ w_regions = split_regions(w_axis)
+
+ for i in range(max_tries):
+ if len(w_regions) > 1:
+ xmin, xmax = region_wise_random_select(w_regions, w)
+ else:
+ xmin, xmax = random_select(w_axis, w)
+ if len(h_regions) > 1:
+ ymin, ymax = region_wise_random_select(h_regions, h)
+ else:
+ ymin, ymax = random_select(h_axis, h)
+
+ if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
+ # area too small
+ continue
+ num_poly_in_rect = 0
+ for poly in text_polys:
+ if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
+ ymax - ymin):
+ num_poly_in_rect += 1
+ break
+
+ if num_poly_in_rect > 0:
+ return xmin, ymin, xmax - xmin, ymax - ymin
+
+ return 0, 0, w, h
+
+
+class EastRandomCropData(object):
+ def __init__(self,
+ size=(640, 640),
+ max_tries=10,
+ min_crop_side_ratio=0.1,
+ keep_ratio=True,
+ **kwargs):
+ self.size = size
+ self.max_tries = max_tries
+ self.min_crop_side_ratio = min_crop_side_ratio
+ self.keep_ratio = keep_ratio
+
+ def __call__(self, data):
+ img = data['image']
+ text_polys = data['polys']
+ ignore_tags = data['ignore_tags']
+ texts = data['texts']
+ all_care_polys = [
+ text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
+ ]
+ # 计算crop区域
+ crop_x, crop_y, crop_w, crop_h = crop_area(
+ img, all_care_polys, self.min_crop_side_ratio, self.max_tries)
+ # crop 图片 保持比例填充
+ scale_w = self.size[0] / crop_w
+ scale_h = self.size[1] / crop_h
+ scale = min(scale_w, scale_h)
+ h = int(crop_h * scale)
+ w = int(crop_w * scale)
+ if self.keep_ratio:
+ padimg = np.zeros((self.size[1], self.size[0], img.shape[2]),
+ img.dtype)
+ padimg[:h, :w] = cv2.resize(
+ img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
+ img = padimg
+ else:
+ img = cv2.resize(
+ img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
+ tuple(self.size))
+ # crop 文本框
+ text_polys_crop = []
+ ignore_tags_crop = []
+ texts_crop = []
+ for poly, text, tag in zip(text_polys, texts, ignore_tags):
+ poly = ((poly - (crop_x, crop_y)) * scale).tolist()
+ if not is_poly_outside_rect(poly, 0, 0, w, h):
+ text_polys_crop.append(poly)
+ ignore_tags_crop.append(tag)
+ texts_crop.append(text)
+ data['image'] = img
+ data['polys'] = np.array(text_polys_crop)
+ data['ignore_tags'] = ignore_tags_crop
+ data['texts'] = texts_crop
+ return data
+
+
+class PSERandomCrop(object):
+ def __init__(self, size, **kwargs):
+ self.size = size
+
+ def __call__(self, data):
+ imgs = data['imgs']
+
+ h, w = imgs[0].shape[0:2]
+ th, tw = self.size
+ if w == tw and h == th:
+ return imgs
+
+ # label中存在文本实例,并且按照概率进行裁剪,使用threshold_label_map控制
+ if np.max(imgs[2]) > 0 and random.random() > 3 / 8:
+ # 文本实例的左上角点
+ tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size
+ tl[tl < 0] = 0
+ # 文本实例的右下角点
+ br = np.max(np.where(imgs[2] > 0), axis=1) - self.size
+ br[br < 0] = 0
+ # 保证选到右下角点时,有足够的距离进行crop
+ br[0] = min(br[0], h - th)
+ br[1] = min(br[1], w - tw)
+
+ for _ in range(50000):
+ i = random.randint(tl[0], br[0])
+ j = random.randint(tl[1], br[1])
+ # 保证shrink_label_map有文本
+ if imgs[1][i:i + th, j:j + tw].sum() <= 0:
+ continue
+ else:
+ break
+ else:
+ i = random.randint(0, h - th)
+ j = random.randint(0, w - tw)
+
+ # return i, j, th, tw
+ for idx in range(len(imgs)):
+ if len(imgs[idx].shape) == 3:
+ imgs[idx] = imgs[idx][i:i + th, j:j + tw, :]
+ else:
+ imgs[idx] = imgs[idx][i:i + th, j:j + tw]
+ data['imgs'] = imgs
+ return data
diff --git a/ppocr/data/rec/img_tools.py b/ppocr/data/imaug/rec_img_aug.py
old mode 100755
new mode 100644
similarity index 52%
rename from ppocr/data/rec/img_tools.py
rename to ppocr/data/imaug/rec_img_aug.py
index 8b497e6b803ba0fffaefc3e12c366130504b9ce0..e3792553ca088c36a1fbd52097b666f7594c220f
--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -1,31 +1,70 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
import math
import cv2
import numpy as np
import random
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
+from .text_image_aug import tia_perspective, tia_stretch, tia_distort
-def get_bounding_box_rect(pos):
- left = min(pos[0])
- right = max(pos[0])
- top = min(pos[1])
- bottom = max(pos[1])
- return [left, top, right, bottom]
+
+class RecAug(object):
+ def __init__(self, **kwargsz):
+ pass
+
+ def __call__(self, data):
+ img = data['image']
+ img = warp(img, 10)
+ data['image'] = img
+ return data
+
+
+class RecResizeImg(object):
+ def __init__(self,
+ image_shape,
+ infer_mode=False,
+ character_type='ch',
+ use_tps=False,
+ **kwargs):
+ self.image_shape = image_shape
+ self.infer_mode = infer_mode
+ self.character_type = character_type
+ self.use_tps = use_tps
+
+ def __call__(self, data):
+ img = data['image']
+ if self.infer_mode and self.character_type == "ch" and not self.use_tps:
+ norm_img = resize_norm_img_chinese(img, self.image_shape)
+ else:
+ norm_img = resize_norm_img(img, self.image_shape)
+ data['image'] = norm_img
+ return data
def resize_norm_img(img, image_shape):
@@ -77,19 +116,6 @@ def resize_norm_img_chinese(img, image_shape):
return padding_im
-def get_img_data(value):
- """get_img_data"""
- if not value:
- return None
- imgdata = np.frombuffer(value, dtype='uint8')
- if imgdata is None:
- return None
- imgori = cv2.imdecode(imgdata, 1)
- if imgori is None:
- return None
- return imgori
-
-
def flag():
"""
flag
@@ -196,6 +222,9 @@ class Config:
self.h = h
self.perspective = True
+ self.stretch = True
+ self.distort = True
+
self.crop = True
self.affine = False
self.reverse = True
@@ -299,168 +328,39 @@ def warp(img, ang):
config.make(w, h, ang)
new_img = img
+ prob = 0.4
+
+ if config.distort:
+ img_height, img_width = img.shape[0:2]
+ if random.random() <= prob and img_height >= 20 and img_width >= 20:
+ new_img = tia_distort(new_img, random.randint(3, 6))
+
+ if config.stretch:
+ img_height, img_width = img.shape[0:2]
+ if random.random() <= prob and img_height >= 20 and img_width >= 20:
+ new_img = tia_stretch(new_img, random.randint(3, 6))
+
if config.perspective:
- tp = random.randint(1, 100)
- if tp >= 50:
- warpR, (r1, c1), ratio, dst = get_warpR(config)
- new_w = int(np.max(dst[:, 0])) - int(np.min(dst[:, 0]))
- new_img = cv2.warpPerspective(
- new_img,
- warpR, (int(new_w * ratio), h),
- borderMode=config.borderMode)
+ if random.random() <= prob:
+ new_img = tia_perspective(new_img)
+
if config.crop:
img_height, img_width = img.shape[0:2]
- tp = random.randint(1, 100)
- if tp >= 50 and img_height >= 20 and img_width >= 20:
+ if random.random() <= prob and img_height >= 20 and img_width >= 20:
new_img = get_crop(new_img)
- if config.affine:
- warpT = get_warpAffine(config)
- new_img = cv2.warpAffine(
- new_img, warpT, (w, h), borderMode=config.borderMode)
+
if config.blur:
- tp = random.randint(1, 100)
- if tp >= 50:
+ if random.random() <= prob:
new_img = blur(new_img)
if config.color:
- tp = random.randint(1, 100)
- if tp >= 50:
+ if random.random() <= prob:
new_img = cvtColor(new_img)
if config.jitter:
new_img = jitter(new_img)
if config.noise:
- tp = random.randint(1, 100)
- if tp >= 50:
+ if random.random() <= prob:
new_img = add_gasuss_noise(new_img)
if config.reverse:
- tp = random.randint(1, 100)
- if tp >= 50:
+ if random.random() <= prob:
new_img = 255 - new_img
return new_img
-
-
-def process_image(img,
- image_shape,
- label=None,
- char_ops=None,
- loss_type=None,
- max_text_length=None,
- tps=None,
- infer_mode=False,
- distort=False):
- if distort:
- img = warp(img, 10)
- if infer_mode and char_ops.character_type == "ch" and not tps:
- norm_img = resize_norm_img_chinese(img, image_shape)
- else:
- norm_img = resize_norm_img(img, image_shape)
-
- norm_img = norm_img[np.newaxis, :]
- if label is not None:
- # char_num = char_ops.get_char_num()
- text = char_ops.encode(label)
- if len(text) == 0 or len(text) > max_text_length:
- logger.info(
- "Warning in ppocr/data/rec/img_tools.py: Wrong data type."
- "Excepted string with length between 1 and {}, but "
- "got '{}'. Label is '{}'".format(max_text_length,
- len(text), label))
- return None
- else:
- if loss_type == "ctc":
- text = text.reshape(-1, 1)
- return (norm_img, text)
- elif loss_type == "attention":
- beg_flag_idx = char_ops.get_beg_end_flag_idx("beg")
- end_flag_idx = char_ops.get_beg_end_flag_idx("end")
- beg_text = np.append(beg_flag_idx, text)
- end_text = np.append(text, end_flag_idx)
- beg_text = beg_text.reshape(-1, 1)
- end_text = end_text.reshape(-1, 1)
- return (norm_img, beg_text, end_text)
- else:
- assert False, "Unsupport loss_type %s in process_image"\
- % loss_type
- return (norm_img)
-
-def resize_norm_img_srn(img, image_shape):
- imgC, imgH, imgW = image_shape
-
- img_black = np.zeros((imgH, imgW))
- im_hei = img.shape[0]
- im_wid = img.shape[1]
-
- if im_wid <= im_hei * 1:
- img_new = cv2.resize(img, (imgH * 1, imgH))
- elif im_wid <= im_hei * 2:
- img_new = cv2.resize(img, (imgH * 2, imgH))
- elif im_wid <= im_hei * 3:
- img_new = cv2.resize(img, (imgH * 3, imgH))
- else:
- img_new = cv2.resize(img, (imgW, imgH))
-
- img_np = np.asarray(img_new)
- img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
- img_black[:, 0:img_np.shape[1]] = img_np
- img_black = img_black[:, :, np.newaxis]
-
- row, col, c = img_black.shape
- c = 1
-
- return np.reshape(img_black, (c, row, col)).astype(np.float32)
-
-def srn_other_inputs(image_shape,
- num_heads,
- max_text_length,
- char_num):
-
- imgC, imgH, imgW = image_shape
- feature_dim = int((imgH / 8) * (imgW / 8))
-
- encoder_word_pos = np.array(range(0, feature_dim)).reshape((feature_dim, 1)).astype('int64')
- gsrm_word_pos = np.array(range(0, max_text_length)).reshape((max_text_length, 1)).astype('int64')
-
- lbl_weight = np.array([int(char_num-1)] * max_text_length).reshape((-1,1)).astype('int64')
-
- gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length))
- gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape([-1, 1, max_text_length, max_text_length])
- gsrm_slf_attn_bias1 = np.tile(gsrm_slf_attn_bias1, [1, num_heads, 1, 1]) * [-1e9]
-
- gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape([-1, 1, max_text_length, max_text_length])
- gsrm_slf_attn_bias2 = np.tile(gsrm_slf_attn_bias2, [1, num_heads, 1, 1]) * [-1e9]
-
- encoder_word_pos = encoder_word_pos[np.newaxis, :]
- gsrm_word_pos = gsrm_word_pos[np.newaxis, :]
-
- return [lbl_weight, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2]
-
-def process_image_srn(img,
- image_shape,
- num_heads,
- max_text_length,
- label=None,
- char_ops=None,
- loss_type=None):
- norm_img = resize_norm_img_srn(img, image_shape)
- norm_img = norm_img[np.newaxis, :]
- char_num = char_ops.get_char_num()
-
- [lbl_weight, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \
- srn_other_inputs(image_shape, num_heads, max_text_length,char_num)
-
- if label is not None:
- text = char_ops.encode(label)
- if len(text) == 0 or len(text) > max_text_length:
- return None
- else:
- if loss_type == "srn":
- text_padded = [int(char_num-1)] * max_text_length
- for i in range(len(text)):
- text_padded[i] = text[i]
- lbl_weight[i] = [1.0]
- text_padded = np.array(text_padded)
- text = text_padded.reshape(-1, 1)
- return (norm_img, text,encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2,lbl_weight)
- else:
- assert False, "Unsupport loss_type %s in process_image"\
- % loss_type
- return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2)
diff --git a/ppocr/data/rec/__init__.py b/ppocr/data/imaug/text_image_aug/__init__.py
old mode 100755
new mode 100644
similarity index 67%
rename from ppocr/data/rec/__init__.py
rename to ppocr/data/imaug/text_image_aug/__init__.py
index abf198b97e6e818e1fbe59006f98492640bcee54..bca262638efb00190aaf2b6328cd34a9e87ba131
--- a/ppocr/data/rec/__init__.py
+++ b/ppocr/data/imaug/text_image_aug/__init__.py
@@ -1,13 +1,17 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+from .augment import tia_perspective, tia_distort, tia_stretch
+
+__all__ = ['tia_distort', 'tia_stretch', 'tia_perspective']
diff --git a/ppocr/data/imaug/text_image_aug/augment.py b/ppocr/data/imaug/text_image_aug/augment.py
new file mode 100644
index 0000000000000000000000000000000000000000..1aeff3733a4521c56dd5972fc058f6e0c245e4b7
--- /dev/null
+++ b/ppocr/data/imaug/text_image_aug/augment.py
@@ -0,0 +1,116 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from .warp_mls import WarpMLS
+
+
+def tia_distort(src, segment=4):
+ img_h, img_w = src.shape[:2]
+
+ cut = img_w // segment
+ thresh = cut // 3
+
+ src_pts = list()
+ dst_pts = list()
+
+ src_pts.append([0, 0])
+ src_pts.append([img_w, 0])
+ src_pts.append([img_w, img_h])
+ src_pts.append([0, img_h])
+
+ dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)])
+ dst_pts.append(
+ [img_w - np.random.randint(thresh), np.random.randint(thresh)])
+ dst_pts.append(
+ [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)])
+ dst_pts.append(
+ [np.random.randint(thresh), img_h - np.random.randint(thresh)])
+
+ half_thresh = thresh * 0.5
+
+ for cut_idx in np.arange(1, segment, 1):
+ src_pts.append([cut * cut_idx, 0])
+ src_pts.append([cut * cut_idx, img_h])
+ dst_pts.append([
+ cut * cut_idx + np.random.randint(thresh) - half_thresh,
+ np.random.randint(thresh) - half_thresh
+ ])
+ dst_pts.append([
+ cut * cut_idx + np.random.randint(thresh) - half_thresh,
+ img_h + np.random.randint(thresh) - half_thresh
+ ])
+
+ trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+ dst = trans.generate()
+
+ return dst
+
+
+def tia_stretch(src, segment=4):
+ img_h, img_w = src.shape[:2]
+
+ cut = img_w // segment
+ thresh = cut * 4 // 5
+
+ src_pts = list()
+ dst_pts = list()
+
+ src_pts.append([0, 0])
+ src_pts.append([img_w, 0])
+ src_pts.append([img_w, img_h])
+ src_pts.append([0, img_h])
+
+ dst_pts.append([0, 0])
+ dst_pts.append([img_w, 0])
+ dst_pts.append([img_w, img_h])
+ dst_pts.append([0, img_h])
+
+ half_thresh = thresh * 0.5
+
+ for cut_idx in np.arange(1, segment, 1):
+ move = np.random.randint(thresh) - half_thresh
+ src_pts.append([cut * cut_idx, 0])
+ src_pts.append([cut * cut_idx, img_h])
+ dst_pts.append([cut * cut_idx + move, 0])
+ dst_pts.append([cut * cut_idx + move, img_h])
+
+ trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+ dst = trans.generate()
+
+ return dst
+
+
+def tia_perspective(src):
+ img_h, img_w = src.shape[:2]
+
+ thresh = img_h // 2
+
+ src_pts = list()
+ dst_pts = list()
+
+ src_pts.append([0, 0])
+ src_pts.append([img_w, 0])
+ src_pts.append([img_w, img_h])
+ src_pts.append([0, img_h])
+
+ dst_pts.append([0, np.random.randint(thresh)])
+ dst_pts.append([img_w, np.random.randint(thresh)])
+ dst_pts.append([img_w, img_h - np.random.randint(thresh)])
+ dst_pts.append([0, img_h - np.random.randint(thresh)])
+
+ trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+ dst = trans.generate()
+
+ return dst
\ No newline at end of file
diff --git a/ppocr/data/imaug/text_image_aug/warp_mls.py b/ppocr/data/imaug/text_image_aug/warp_mls.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6cbe749b61aa4cf3163927c096868c83f4a4cdd
--- /dev/null
+++ b/ppocr/data/imaug/text_image_aug/warp_mls.py
@@ -0,0 +1,164 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+class WarpMLS:
+ def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.):
+ self.src = src
+ self.src_pts = src_pts
+ self.dst_pts = dst_pts
+ self.pt_count = len(self.dst_pts)
+ self.dst_w = dst_w
+ self.dst_h = dst_h
+ self.trans_ratio = trans_ratio
+ self.grid_size = 100
+ self.rdx = np.zeros((self.dst_h, self.dst_w))
+ self.rdy = np.zeros((self.dst_h, self.dst_w))
+
+ @staticmethod
+ def __bilinear_interp(x, y, v11, v12, v21, v22):
+ return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 *
+ (1 - y) + v22 * y) * x
+
+ def generate(self):
+ self.calc_delta()
+ return self.gen_img()
+
+ def calc_delta(self):
+ w = np.zeros(self.pt_count, dtype=np.float32)
+
+ if self.pt_count < 2:
+ return
+
+ i = 0
+ while 1:
+ if self.dst_w <= i < self.dst_w + self.grid_size - 1:
+ i = self.dst_w - 1
+ elif i >= self.dst_w:
+ break
+
+ j = 0
+ while 1:
+ if self.dst_h <= j < self.dst_h + self.grid_size - 1:
+ j = self.dst_h - 1
+ elif j >= self.dst_h:
+ break
+
+ sw = 0
+ swp = np.zeros(2, dtype=np.float32)
+ swq = np.zeros(2, dtype=np.float32)
+ new_pt = np.zeros(2, dtype=np.float32)
+ cur_pt = np.array([i, j], dtype=np.float32)
+
+ k = 0
+ for k in range(self.pt_count):
+ if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+ break
+
+ w[k] = 1. / (
+ (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) +
+ (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1]))
+
+ sw += w[k]
+ swp = swp + w[k] * np.array(self.dst_pts[k])
+ swq = swq + w[k] * np.array(self.src_pts[k])
+
+ if k == self.pt_count - 1:
+ pstar = 1 / sw * swp
+ qstar = 1 / sw * swq
+
+ miu_s = 0
+ for k in range(self.pt_count):
+ if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+ continue
+ pt_i = self.dst_pts[k] - pstar
+ miu_s += w[k] * np.sum(pt_i * pt_i)
+
+ cur_pt -= pstar
+ cur_pt_j = np.array([-cur_pt[1], cur_pt[0]])
+
+ for k in range(self.pt_count):
+ if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+ continue
+
+ pt_i = self.dst_pts[k] - pstar
+ pt_j = np.array([-pt_i[1], pt_i[0]])
+
+ tmp_pt = np.zeros(2, dtype=np.float32)
+ tmp_pt[0] = np.sum(pt_i * cur_pt) * self.src_pts[k][0] - \
+ np.sum(pt_j * cur_pt) * self.src_pts[k][1]
+ tmp_pt[1] = -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] + \
+ np.sum(pt_j * cur_pt_j) * self.src_pts[k][1]
+ tmp_pt *= (w[k] / miu_s)
+ new_pt += tmp_pt
+
+ new_pt += qstar
+ else:
+ new_pt = self.src_pts[k]
+
+ self.rdx[j, i] = new_pt[0] - i
+ self.rdy[j, i] = new_pt[1] - j
+
+ j += self.grid_size
+ i += self.grid_size
+
+ def gen_img(self):
+ src_h, src_w = self.src.shape[:2]
+ dst = np.zeros_like(self.src, dtype=np.float32)
+
+ for i in np.arange(0, self.dst_h, self.grid_size):
+ for j in np.arange(0, self.dst_w, self.grid_size):
+ ni = i + self.grid_size
+ nj = j + self.grid_size
+ w = h = self.grid_size
+ if ni >= self.dst_h:
+ ni = self.dst_h - 1
+ h = ni - i + 1
+ if nj >= self.dst_w:
+ nj = self.dst_w - 1
+ w = nj - j + 1
+
+ di = np.reshape(np.arange(h), (-1, 1))
+ dj = np.reshape(np.arange(w), (1, -1))
+ delta_x = self.__bilinear_interp(
+ di / h, dj / w, self.rdx[i, j], self.rdx[i, nj],
+ self.rdx[ni, j], self.rdx[ni, nj])
+ delta_y = self.__bilinear_interp(
+ di / h, dj / w, self.rdy[i, j], self.rdy[i, nj],
+ self.rdy[ni, j], self.rdy[ni, nj])
+ nx = j + dj + delta_x * self.trans_ratio
+ ny = i + di + delta_y * self.trans_ratio
+ nx = np.clip(nx, 0, src_w - 1)
+ ny = np.clip(ny, 0, src_h - 1)
+ nxi = np.array(np.floor(nx), dtype=np.int32)
+ nyi = np.array(np.floor(ny), dtype=np.int32)
+ nxi1 = np.array(np.ceil(nx), dtype=np.int32)
+ nyi1 = np.array(np.ceil(ny), dtype=np.int32)
+
+ if len(self.src.shape) == 3:
+ x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3))
+ y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3))
+ else:
+ x = ny - nyi
+ y = nx - nxi
+ dst[i:i + h, j:j + w] = self.__bilinear_interp(
+ x, y, self.src[nyi, nxi], self.src[nyi, nxi1],
+ self.src[nyi1, nxi], self.src[nyi1, nxi1])
+
+ dst = np.clip(dst, 0, 255)
+ dst = np.array(dst, dtype=np.uint8)
+
+ return dst
\ No newline at end of file
diff --git a/ppocr/data/reader_main.py b/ppocr/data/reader_main.py
deleted file mode 100755
index b0df0d462b3dd851c8c5ffbe2aff988b8f6b69f6..0000000000000000000000000000000000000000
--- a/ppocr/data/reader_main.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import random
-import numpy as np
-
-import paddle
-from ppocr.utils.utility import create_module
-from copy import deepcopy
-
-from .rec.img_tools import process_image
-import cv2
-
-import sys
-import signal
-
-
-# handle terminate reader process, do not print stack frame
-def _reader_quit(signum, frame):
- print("Reader process exit.")
- sys.exit()
-
-
-def _term_group(sig_num, frame):
- print('pid {} terminated, terminate group '
- '{}...'.format(os.getpid(), os.getpgrp()))
- os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
-
-
-signal.signal(signal.SIGTERM, _reader_quit)
-signal.signal(signal.SIGINT, _term_group)
-
-
-def reader_main(config=None, mode=None):
- """Create a reader for trainning
-
- Args:
- settings: arguments
-
- Returns:
- train reader
- """
- assert mode in ["train", "eval", "test"],\
- "Nonsupport mode:{}".format(mode)
- global_params = config['Global']
- if mode == "train":
- params = deepcopy(config['TrainReader'])
- elif mode == "eval":
- params = deepcopy(config['EvalReader'])
- else:
- params = deepcopy(config['TestReader'])
- params['mode'] = mode
- params.update(global_params)
- reader_function = params['reader_function']
- function = create_module(reader_function)(params)
- if mode == "train":
- if sys.platform == "win32":
- return function(0)
- readers = []
- num_workers = params['num_workers']
- for process_id in range(num_workers):
- readers.append(function(process_id))
- return paddle.reader.multiprocess_reader(readers, False)
- else:
- return function(mode)
diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py
deleted file mode 100755
index 84f325b9b880d6289a4d60f7ebff39d962fdb5a1..0000000000000000000000000000000000000000
--- a/ppocr/data/rec/dataset_traversal.py
+++ /dev/null
@@ -1,335 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import sys
-import math
-import random
-import numpy as np
-import cv2
-
-import string
-import lmdb
-
-from ppocr.utils.utility import initial_logger
-from ppocr.utils.utility import get_image_file_list
-logger = initial_logger()
-
-from .img_tools import process_image, process_image_srn, get_img_data
-
-
-class LMDBReader(object):
- def __init__(self, params):
- if params['mode'] != 'train':
- self.num_workers = 1
- else:
- self.num_workers = params['num_workers']
- self.lmdb_sets_dir = params['lmdb_sets_dir']
- self.char_ops = params['char_ops']
- self.image_shape = params['image_shape']
- self.loss_type = params['loss_type']
- self.max_text_length = params['max_text_length']
- self.mode = params['mode']
- self.drop_last = False
- self.use_tps = False
- self.num_heads = None
- if "num_heads" in params:
- self.num_heads = params['num_heads']
- if "tps" in params:
- self.ues_tps = True
- self.use_distort = False
- if "distort" in params:
- self.use_distort = params['distort'] and params['use_gpu']
- if not params['use_gpu']:
- logger.info(
- "Distort operation can only support in GPU. Distort will be set to False."
- )
- if params['mode'] == 'train':
- self.batch_size = params['train_batch_size_per_card']
- self.drop_last = True
- else:
- self.batch_size = params['test_batch_size_per_card']
- self.drop_last = False
- self.use_distort = False
- self.infer_img = params['infer_img']
-
- def load_hierarchical_lmdb_dataset(self):
- lmdb_sets = {}
- dataset_idx = 0
- for dirpath, dirnames, filenames in os.walk(self.lmdb_sets_dir + '/'):
- if not dirnames:
- env = lmdb.open(
- dirpath,
- max_readers=32,
- readonly=True,
- lock=False,
- readahead=False,
- meminit=False)
- txn = env.begin(write=False)
- num_samples = int(txn.get('num-samples'.encode()))
- lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \
- "txn":txn, "num_samples":num_samples}
- dataset_idx += 1
- return lmdb_sets
-
- def print_lmdb_sets_info(self, lmdb_sets):
- lmdb_info_strs = []
- for dataset_idx in range(len(lmdb_sets)):
- tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
- lmdb_sets[dataset_idx]['num_samples'])
- lmdb_info_strs.append(tmp_str)
- lmdb_info_strs = ''.join(lmdb_info_strs)
- logger.info("DataSummary:" + lmdb_info_strs)
- return
-
- def close_lmdb_dataset(self, lmdb_sets):
- for dataset_idx in lmdb_sets:
- lmdb_sets[dataset_idx]['env'].close()
- return
-
- def get_lmdb_sample_info(self, txn, index):
- label_key = 'label-%09d'.encode() % index
- label = txn.get(label_key)
- if label is None:
- return None
- label = label.decode('utf-8')
- img_key = 'image-%09d'.encode() % index
- imgbuf = txn.get(img_key)
- img = get_img_data(imgbuf)
- if img is None:
- return None
- return img, label
-
- def __call__(self, process_id):
- if self.mode != 'train':
- process_id = 0
-
- def sample_iter_reader():
- if self.mode != 'train' and self.infer_img is not None:
- image_file_list = get_image_file_list(self.infer_img)
- for single_img in image_file_list:
- img = cv2.imread(single_img)
- if img.shape[-1] == 1 or len(list(img.shape)) == 2:
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
- if self.loss_type == 'srn':
- norm_img = process_image_srn(
- img=img,
- image_shape=self.image_shape,
- num_heads=self.num_heads,
- max_text_length=self.max_text_length)
- else:
- norm_img = process_image(
- img=img,
- image_shape=self.image_shape,
- char_ops=self.char_ops,
- tps=self.use_tps,
- infer_mode=True)
- yield norm_img
- else:
- lmdb_sets = self.load_hierarchical_lmdb_dataset()
- if process_id == 0:
- self.print_lmdb_sets_info(lmdb_sets)
- cur_index_sets = [1 + process_id] * len(lmdb_sets)
- while True:
- finish_read_num = 0
- for dataset_idx in range(len(lmdb_sets)):
- cur_index = cur_index_sets[dataset_idx]
- if cur_index > lmdb_sets[dataset_idx]['num_samples']:
- finish_read_num += 1
- else:
- sample_info = self.get_lmdb_sample_info(
- lmdb_sets[dataset_idx]['txn'], cur_index)
- cur_index_sets[dataset_idx] += self.num_workers
- if sample_info is None:
- continue
- img, label = sample_info
- outs = []
- if self.loss_type == "srn":
- outs = process_image_srn(
- img=img,
- image_shape=self.image_shape,
- num_heads=self.num_heads,
- max_text_length=self.max_text_length,
- label=label,
- char_ops=self.char_ops,
- loss_type=self.loss_type)
-
- else:
- outs = process_image(
- img=img,
- image_shape=self.image_shape,
- label=label,
- char_ops=self.char_ops,
- loss_type=self.loss_type,
- max_text_length=self.max_text_length)
- if outs is None:
- continue
- yield outs
-
- if finish_read_num == len(lmdb_sets):
- break
- self.close_lmdb_dataset(lmdb_sets)
-
- def batch_iter_reader():
- batch_outs = []
- for outs in sample_iter_reader():
- batch_outs.append(outs)
- if len(batch_outs) == self.batch_size:
- yield batch_outs
- batch_outs = []
- if not self.drop_last:
- if len(batch_outs) != 0:
- yield batch_outs
-
- if self.infer_img is None:
- return batch_iter_reader
- return sample_iter_reader
-
-
-class SimpleReader(object):
- def __init__(self, params):
- if params['mode'] != 'train':
- self.num_workers = 1
- else:
- self.num_workers = params['num_workers']
- if params['mode'] != 'test':
- self.img_set_dir = params['img_set_dir']
- self.label_file_path = params['label_file_path']
- self.use_gpu = params['use_gpu']
- self.char_ops = params['char_ops']
- self.image_shape = params['image_shape']
- self.loss_type = params['loss_type']
- self.max_text_length = params['max_text_length']
- self.mode = params['mode']
- self.infer_img = params['infer_img']
- self.use_tps = False
- if "num_heads" in params:
- self.num_heads = params['num_heads']
- if "tps" in params:
- self.use_tps = True
- self.use_distort = False
- if "distort" in params:
- self.use_distort = params['distort'] and params['use_gpu']
- if not params['use_gpu']:
- logger.info(
- "Distort operation can only support in GPU.Distort will be set to False."
- )
- if params['mode'] == 'train':
- self.batch_size = params['train_batch_size_per_card']
- self.drop_last = True
- else:
- self.batch_size = params['test_batch_size_per_card']
- self.drop_last = False
- self.use_distort = False
-
- def __call__(self, process_id):
- if self.mode != 'train':
- process_id = 0
-
- def get_device_num():
- if self.use_gpu:
- gpus = os.environ.get("CUDA_VISIBLE_DEVICES", '1')
- gpu_num = len(gpus.split(','))
- return gpu_num
- else:
- cpu_num = os.environ.get("CPU_NUM", 1)
- return int(cpu_num)
-
- def sample_iter_reader():
- if self.mode != 'train' and self.infer_img is not None:
- image_file_list = get_image_file_list(self.infer_img)
- for single_img in image_file_list:
- img = cv2.imread(single_img)
- if img.shape[-1] == 1 or len(list(img.shape)) == 2:
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
- if self.loss_type == 'srn':
- norm_img = process_image_srn(
- img=img,
- image_shape=self.image_shape,
- char_ops=self.char_ops,
- num_heads=self.num_heads,
- max_text_length=self.max_text_length)
- else:
- norm_img = process_image(
- img=img,
- image_shape=self.image_shape,
- char_ops=self.char_ops,
- tps=self.use_tps,
- infer_mode=True)
- yield norm_img
- else:
- with open(self.label_file_path, "rb") as fin:
- label_infor_list = fin.readlines()
- img_num = len(label_infor_list)
- img_id_list = list(range(img_num))
- random.shuffle(img_id_list)
- if sys.platform == "win32" and self.num_workers != 1:
- print("multiprocess is not fully compatible with Windows."
- "num_workers will be 1.")
- self.num_workers = 1
- if self.batch_size * get_device_num(
- ) * self.num_workers > img_num:
- raise Exception(
- "The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})".
- format(img_num, self.batch_size * get_device_num() *
- self.num_workers))
- for img_id in range(process_id, img_num, self.num_workers):
- label_infor = label_infor_list[img_id_list[img_id]]
- substr = label_infor.decode('utf-8').strip("\n").split("\t")
- img_path = self.img_set_dir + "/" + substr[0]
- img = cv2.imread(img_path)
- if img is None:
- logger.info("{} does not exist!".format(img_path))
- continue
- if img.shape[-1] == 1 or len(list(img.shape)) == 2:
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-
- label = substr[1]
- if self.loss_type == "srn":
- outs = process_image_srn(
- img=img,
- image_shape=self.image_shape,
- num_heads=self.num_heads,
- max_text_length=self.max_text_length,
- label=label,
- char_ops=self.char_ops,
- loss_type=self.loss_type)
-
- else:
- outs = process_image(
- img=img,
- image_shape=self.image_shape,
- label=label,
- char_ops=self.char_ops,
- loss_type=self.loss_type,
- max_text_length=self.max_text_length,
- distort=self.use_distort)
- if outs is None:
- continue
- yield outs
-
- def batch_iter_reader():
- batch_outs = []
- for outs in sample_iter_reader():
- batch_outs.append(outs)
- if len(batch_outs) == self.batch_size:
- yield batch_outs
- batch_outs = []
- if not self.drop_last:
- if len(batch_outs) != 0:
- yield batch_outs
-
- if self.infer_img is None:
- return batch_iter_reader
- return sample_iter_reader
diff --git a/ppocr/metrics/DetMetric.py b/ppocr/metrics/DetMetric.py
new file mode 100644
index 0000000000000000000000000000000000000000..889a8e152254365f9c4d417125e2e642577660b5
--- /dev/null
+++ b/ppocr/metrics/DetMetric.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+__all__ = ['DetMetric']
+
+from .eval_det_iou import DetectionIoUEvaluator
+
+
+class DetMetric(object):
+ def __init__(self, main_indicator='hmean', **kwargs):
+ self.evaluator = DetectionIoUEvaluator()
+ self.main_indicator = main_indicator
+ self.reset()
+
+ def __call__(self, preds, batch, **kwargs):
+ '''
+ batch: a list produced by dataloaders.
+ image: np.ndarray of shape (N, C, H, W).
+ ratio_list: np.ndarray of shape(N,2)
+ polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
+ ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not.
+ preds: a list of dict produced by post process
+ points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
+ '''
+ gt_polyons_batch = batch[2]
+ ignore_tags_batch = batch[3]
+ for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch,
+ ignore_tags_batch):
+ # prepare gt
+ gt_info_list = [{
+ 'points': gt_polyon,
+ 'text': '',
+ 'ignore': ignore_tag
+ } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)]
+ # prepare det
+ det_info_list = [{
+ 'points': det_polyon,
+ 'text': ''
+ } for det_polyon in pred['points']]
+ result = self.evaluator.evaluate_image(gt_info_list, det_info_list)
+ self.results.append(result)
+
+ def get_metric(self):
+ """
+ return metircs {
+ 'precision': 0,
+ 'recall': 0,
+ 'hmean': 0
+ }
+ """
+
+ metircs = self.evaluator.combine_results(self.results)
+ self.reset()
+ return metircs
+
+ def reset(self):
+ self.results = [] # clear results
diff --git a/ppocr/metrics/RecMetric.py b/ppocr/metrics/RecMetric.py
new file mode 100644
index 0000000000000000000000000000000000000000..98817ad82952bb07a39d594cb6994a5460aff496
--- /dev/null
+++ b/ppocr/metrics/RecMetric.py
@@ -0,0 +1,59 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import Levenshtein
+
+
+class RecMetric(object):
+ def __init__(self, main_indicator='acc', **kwargs):
+ self.main_indicator = main_indicator
+ self.reset()
+
+ def __call__(self, pred_label, *args, **kwargs):
+ preds, labels = pred_label
+ correct_num = 0
+ all_num = 0
+ norm_edit_dis = 0.0
+ for (pred, pred_conf), (target, _) in zip(preds, labels):
+ norm_edit_dis += Levenshtein.distance(pred, target) / max(
+ len(pred), len(target))
+ if pred == target:
+ correct_num += 1
+ all_num += 1
+ # if all_num < 10 and kwargs.get('show_str', False):
+ # print('{} -> {}'.format(pred, target))
+ self.correct_num += correct_num
+ self.all_num += all_num
+ self.norm_edit_dis += norm_edit_dis
+ return {
+ 'acc': correct_num / all_num,
+ 'norm_edit_dis': 1 - norm_edit_dis / all_num
+ }
+
+ def get_metric(self):
+ """
+ return metircs {
+ 'acc': 0,
+ 'norm_edit_dis': 0,
+ }
+ """
+ acc = self.correct_num / self.all_num
+ norm_edit_dis = 1 - self.norm_edit_dis / self.all_num
+ self.reset()
+ return {'acc': acc, 'norm_edit_dis': norm_edit_dis}
+
+ def reset(self):
+ self.correct_num = 0
+ self.all_num = 0
+ self.norm_edit_dis = 0
diff --git a/ppocr/metrics/__init__.py b/ppocr/metrics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fddac17aaa25d75a95d3d981a8e5bc5e9d0d896
--- /dev/null
+++ b/ppocr/metrics/__init__.py
@@ -0,0 +1,36 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+
+__all__ = ['build_metric']
+
+
+def build_metric(config):
+ from .DetMetric import DetMetric
+ from .RecMetric import RecMetric
+
+ support_dict = ['DetMetric', 'RecMetric']
+
+ config = copy.deepcopy(config)
+ module_name = config.pop('name')
+ assert module_name in support_dict, Exception(
+ 'metric only support {}'.format(support_dict))
+ module_class = eval(module_name)(**config)
+ return module_class
diff --git a/tools/eval_utils/eval_det_iou.py b/ppocr/metrics/eval_det_iou.py
similarity index 97%
rename from tools/eval_utils/eval_det_iou.py
rename to ppocr/metrics/eval_det_iou.py
index 64405984e3b90aa58f21324e727713075c5c4dc4..a2a3f41833a9ef7615b73b70808fcb3ba2f22aa4 100644
--- a/tools/eval_utils/eval_det_iou.py
+++ b/ppocr/metrics/eval_det_iou.py
@@ -88,8 +88,8 @@ class DetectionIoUEvaluator(object):
points = gt[n]['points']
# transcription = gt[n]['text']
dontCare = gt[n]['ignore']
-# points = Polygon(points)
-# points = points.buffer(0)
+ # points = Polygon(points)
+ # points = points.buffer(0)
if not Polygon(points).is_valid or not Polygon(points).is_simple:
continue
@@ -105,8 +105,8 @@ class DetectionIoUEvaluator(object):
for n in range(len(pred)):
points = pred[n]['points']
-# points = Polygon(points)
-# points = points.buffer(0)
+ # points = Polygon(points)
+ # points = points.buffer(0)
if not Polygon(points).is_valid or not Polygon(points).is_simple:
continue
diff --git a/ppocr/modeling/__init__.py b/ppocr/modeling/__init__.py
index abf198b97e6e818e1fbe59006f98492640bcee54..2d7f1b8e5aa79a180b9493ea8acc4e02e2e50835 100755
--- a/ppocr/modeling/__init__.py
+++ b/ppocr/modeling/__init__.py
@@ -11,3 +11,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+import copy
+from .losses import build_loss
+
+__all__ = ['build_model', 'build_loss']
+
+
+def build_model(config):
+ from .architectures import Model
+
+ config = copy.deepcopy(config)
+ module_class = Model(config)
+ return module_class
diff --git a/ppocr/modeling/architectures/__init__.py b/ppocr/modeling/architectures/__init__.py
index abf198b97e6e818e1fbe59006f98492640bcee54..e0f823e47d244d336de7e3f94a768c06069e91d0 100755
--- a/ppocr/modeling/architectures/__init__.py
+++ b/ppocr/modeling/architectures/__init__.py
@@ -11,3 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+from .model import Model
+__all__ = ['Model']
\ No newline at end of file
diff --git a/ppocr/modeling/architectures/det_model.py b/ppocr/modeling/architectures/det_model.py
deleted file mode 100644
index 54d3a479f40a3f9f6ebb9e6ab739ae7a44796a2e..0000000000000000000000000000000000000000
--- a/ppocr/modeling/architectures/det_model.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from paddle import fluid
-
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from copy import deepcopy
-
-
-class DetModel(object):
- def __init__(self, params):
- """
- Detection module for OCR text detection.
- args:
- params (dict): the super parameters for detection module.
- """
- global_params = params['Global']
- self.algorithm = global_params['algorithm']
-
- backbone_params = deepcopy(params["Backbone"])
- backbone_params.update(global_params)
- self.backbone = create_module(backbone_params['function'])\
- (params=backbone_params)
-
- head_params = deepcopy(params["Head"])
- head_params.update(global_params)
- self.head = create_module(head_params['function'])\
- (params=head_params)
-
- loss_params = deepcopy(params["Loss"])
- loss_params.update(global_params)
- self.loss = create_module(loss_params['function'])\
- (params=loss_params)
-
- self.image_shape = global_params['image_shape']
-
- def create_feed(self, mode):
- """
- create Dataloader feeds
- args:
- mode (str): 'train' for training or else for evaluation
- return: (image, corresponding label, dataloader)
- """
- image_shape = deepcopy(self.image_shape)
- if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
- raise Exception("The size of the image must be divisible by 4, "
- "received image shape is {}, please reset the "
- "Global.image_shape in the yml file".format(
- image_shape))
-
- image = fluid.layers.data(
- name='image', shape=image_shape, dtype='float32')
- if mode == "train":
- if self.algorithm == "EAST":
- h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
- score = fluid.layers.data(
- name='score', shape=[1, h, w], dtype='float32')
- geo = fluid.layers.data(
- name='geo', shape=[9, h, w], dtype='float32')
- mask = fluid.layers.data(
- name='mask', shape=[1, h, w], dtype='float32')
- feed_list = [image, score, geo, mask]
- labels = {'score': score, 'geo': geo, 'mask': mask}
- elif self.algorithm == "DB":
- shrink_map = fluid.layers.data(
- name='shrink_map', shape=image_shape[1:], dtype='float32')
- shrink_mask = fluid.layers.data(
- name='shrink_mask', shape=image_shape[1:], dtype='float32')
- threshold_map = fluid.layers.data(
- name='threshold_map',
- shape=image_shape[1:],
- dtype='float32')
- threshold_mask = fluid.layers.data(
- name='threshold_mask',
- shape=image_shape[1:],
- dtype='float32')
- feed_list=[image, shrink_map, shrink_mask,\
- threshold_map, threshold_mask]
- labels = {'shrink_map':shrink_map,\
- 'shrink_mask':shrink_mask,\
- 'threshold_map':threshold_map,\
- 'threshold_mask':threshold_mask}
- elif self.algorithm == "SAST":
- input_score = fluid.layers.data(
- name='score', shape=[1, 128, 128], dtype='float32')
- input_border = fluid.layers.data(
- name='border', shape=[5, 128, 128], dtype='float32')
- input_mask = fluid.layers.data(
- name='mask', shape=[1, 128, 128], dtype='float32')
- input_tvo = fluid.layers.data(
- name='tvo', shape=[9, 128, 128], dtype='float32')
- input_tco = fluid.layers.data(
- name='tco', shape=[3, 128, 128], dtype='float32')
- feed_list = [image, input_score, input_border, input_mask, input_tvo, input_tco]
- labels = {'input_score': input_score,\
- 'input_border': input_border,\
- 'input_mask': input_mask,\
- 'input_tvo': input_tvo,\
- 'input_tco': input_tco}
- loader = fluid.io.DataLoader.from_generator(
- feed_list=feed_list,
- capacity=64,
- use_double_buffer=True,
- iterable=False)
- else:
- labels = None
- loader = None
- return image, labels, loader
-
- def __call__(self, mode):
- """
- run forward of defined module
- args:
- mode (str): 'train' for training; 'export' for inference,
- others for evaluation]
- """
- image, labels, loader = self.create_feed(mode)
- conv_feas = self.backbone(image)
- if self.algorithm == "DB":
- predicts = self.head(conv_feas, mode)
- else:
- predicts = self.head(conv_feas)
- if mode == "train":
- losses = self.loss(predicts, labels)
- return loader, losses
- elif mode == "export":
- return [image, predicts]
- else:
- return loader, predicts
diff --git a/ppocr/modeling/architectures/model.py b/ppocr/modeling/architectures/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8cb6f240cb57cbc9e49ce1235916aec2571a847
--- /dev/null
+++ b/ppocr/modeling/architectures/model.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os, sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append('/home/zhoujun20/PaddleOCR')
+
+import paddle
+from paddle import nn
+from ppocr.modeling.transform import build_transform
+from ppocr.modeling.backbones import build_backbone
+from ppocr.modeling.necks import build_neck
+from ppocr.modeling.heads import build_head
+
+__all__ = ['Model']
+
+
+class Model(nn.Layer):
+ def __init__(self, config):
+ """
+ Detection module for OCR.
+ args:
+ config (dict): the super parameters for module.
+ """
+ super(Model, self).__init__()
+ algorithm = config['algorithm']
+ self.type = config['type']
+ self.model_name = '{}_{}'.format(self.type, algorithm)
+
+ in_channels = config.get('in_channels', 3)
+ # build transfrom,
+ # for rec, transfrom can be TPS,None
+ # for det and cls, transfrom shoule to be None,
+ # if you make model differently, you can use transfrom in det and cls
+ if 'Transform' not in config or config['Transform'] is None:
+ self.use_transform = False
+ else:
+ self.use_transform = True
+ config['Transform']['in_channels'] = in_channels
+ self.transform = build_transform(config['Transform'])
+ in_channels = self.transform.out_channels
+
+ # build backbone, backbone is need for del, rec and cls
+ config["Backbone"]['in_channels'] = in_channels
+ self.backbone = build_backbone(config["Backbone"], self.type)
+ in_channels = self.backbone.out_channels
+
+ # build neck
+ # for rec, neck can be cnn,rnn or reshape(None)
+ # for det, neck can be FPN, BIFPN and so on.
+ # for cls, neck should be none
+ if 'Neck' not in config or config['Neck'] is None:
+ self.use_neck = False
+ else:
+ self.use_neck = True
+ config['Neck']['in_channels'] = in_channels
+ self.neck = build_neck(config['Neck'])
+ in_channels = self.neck.out_channels
+
+ # # build head, head is need for del, rec and cls
+ config["Head"]['in_channels'] = in_channels
+ self.head = build_head(config["Head"])
+
+ # @paddle.jit.to_static
+ def forward(self, x):
+ if self.use_transform:
+ x = self.transform(x)
+ x = self.backbone(x)
+ if self.use_neck:
+ x = self.neck(x)
+ x = self.head(x)
+ return x
+
+
+def check_static():
+ import numpy as np
+ from ppocr.utils.save_load import load_dygraph_pretrain
+ from ppocr.utils.logging import get_logger
+ from tools import program
+
+ config = program.load_config('configs/det/det_r50_vd_db.yml')
+
+ # import cv2
+ # data = cv2.imread('doc/imgs/1.jpg')
+ # data = normalize(data)
+ logger = get_logger()
+ data = np.zeros((1, 3, 640, 640), dtype=np.float32)
+ paddle.disable_static()
+
+ config['Architecture']['in_channels'] = 3
+ config['Architecture']["Head"]['out_channels'] = 6624
+ model = Model(config['Architecture'])
+ model.eval()
+ load_dygraph_pretrain(
+ model,
+ logger,
+ '/Users/zhoujun20/Desktop/code/PaddleOCR/db/db',
+ load_static_weights=True)
+ x = paddle.to_variable(data)
+ y = model(x)
+ for y1 in y:
+ print(y1.shape)
+ #
+ # # from matplotlib import pyplot as plt
+ # # plt.imshow(y.numpy())
+ # # plt.show()
+ static_out = np.load('/Users/zhoujun20/Desktop/code/PaddleOCR/db/db.npy')
+ diff = y.numpy() - static_out
+ print(y.shape, static_out.shape, diff.mean())
+
+
+if __name__ == '__main__':
+ check_static()
diff --git a/ppocr/modeling/architectures/rec_model.py b/ppocr/modeling/architectures/rec_model.py
deleted file mode 100755
index fe2d4c16dce3882980fe2238ecc16c7c08a89792..0000000000000000000000000000000000000000
--- a/ppocr/modeling/architectures/rec_model.py
+++ /dev/null
@@ -1,228 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from paddle import fluid
-
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from copy import deepcopy
-
-
-class RecModel(object):
- def __init__(self, params):
- super(RecModel, self).__init__()
- global_params = params['Global']
- char_num = global_params['char_ops'].get_char_num()
- global_params['char_num'] = char_num
- self.char_type = global_params['character_type']
- self.infer_img = global_params['infer_img']
- if "TPS" in params:
- tps_params = deepcopy(params["TPS"])
- tps_params.update(global_params)
- self.tps = create_module(tps_params['function'])\
- (params=tps_params)
- else:
- self.tps = None
-
- backbone_params = deepcopy(params["Backbone"])
- backbone_params.update(global_params)
- self.backbone = create_module(backbone_params['function'])\
- (params=backbone_params)
-
- head_params = deepcopy(params["Head"])
- head_params.update(global_params)
- self.head = create_module(head_params['function'])\
- (params=head_params)
-
- loss_params = deepcopy(params["Loss"])
- loss_params.update(global_params)
- self.loss = create_module(loss_params['function'])\
- (params=loss_params)
-
- self.loss_type = global_params['loss_type']
- self.image_shape = global_params['image_shape']
- self.max_text_length = global_params['max_text_length']
- if "num_heads" in global_params:
- self.num_heads = global_params["num_heads"]
- else:
- self.num_heads = None
-
- def create_feed(self, mode):
- image_shape = deepcopy(self.image_shape)
- image_shape.insert(0, -1)
- if mode == "train":
- image = fluid.data(name='image', shape=image_shape, dtype='float32')
- if self.loss_type == "attention":
- label_in = fluid.data(
- name='label_in',
- shape=[None, 1],
- dtype='int32',
- lod_level=1)
- label_out = fluid.data(
- name='label_out',
- shape=[None, 1],
- dtype='int32',
- lod_level=1)
- feed_list = [image, label_in, label_out]
- labels = {'label_in': label_in, 'label_out': label_out}
- elif self.loss_type == "srn":
- encoder_word_pos = fluid.data(
- name="encoder_word_pos",
- shape=[
- -1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
- 1
- ],
- dtype="int64")
- gsrm_word_pos = fluid.data(
- name="gsrm_word_pos",
- shape=[-1, self.max_text_length, 1],
- dtype="int64")
- gsrm_slf_attn_bias1 = fluid.data(
- name="gsrm_slf_attn_bias1",
- shape=[
- -1, self.num_heads, self.max_text_length,
- self.max_text_length
- ],
- dtype="float32")
- gsrm_slf_attn_bias2 = fluid.data(
- name="gsrm_slf_attn_bias2",
- shape=[
- -1, self.num_heads, self.max_text_length,
- self.max_text_length
- ],
- dtype="float32")
- lbl_weight = fluid.layers.data(
- name="lbl_weight", shape=[-1, 1], dtype='int64')
- label = fluid.data(
- name='label', shape=[-1, 1], dtype='int32', lod_level=1)
- feed_list = [
- image, label, encoder_word_pos, gsrm_word_pos,
- gsrm_slf_attn_bias1, gsrm_slf_attn_bias2, lbl_weight
- ]
- labels = {
- 'label': label,
- 'encoder_word_pos': encoder_word_pos,
- 'gsrm_word_pos': gsrm_word_pos,
- 'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
- 'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2,
- 'lbl_weight': lbl_weight
- }
- else:
- label = fluid.data(
- name='label', shape=[None, 1], dtype='int32', lod_level=1)
- feed_list = [image, label]
- labels = {'label': label}
- loader = fluid.io.DataLoader.from_generator(
- feed_list=feed_list,
- capacity=64,
- use_double_buffer=True,
- iterable=False)
- else:
- labels = None
- loader = None
- if self.char_type == "ch" and self.infer_img:
- image_shape[-1] = -1
- if self.tps != None:
- logger.info(
- "WARNRNG!!!\n"
- "TPS does not support variable shape in chinese!"
- "We set img_shape to be the same , it may affect the inference effect"
- )
- image_shape = deepcopy(self.image_shape)
- image = fluid.data(name='image', shape=image_shape, dtype='float32')
- if self.loss_type == "srn":
- encoder_word_pos = fluid.data(
- name="encoder_word_pos",
- shape=[
- -1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
- 1
- ],
- dtype="int64")
- gsrm_word_pos = fluid.data(
- name="gsrm_word_pos",
- shape=[-1, self.max_text_length, 1],
- dtype="int64")
- gsrm_slf_attn_bias1 = fluid.data(
- name="gsrm_slf_attn_bias1",
- shape=[
- -1, self.num_heads, self.max_text_length,
- self.max_text_length
- ],
- dtype="float32")
- gsrm_slf_attn_bias2 = fluid.data(
- name="gsrm_slf_attn_bias2",
- shape=[
- -1, self.num_heads, self.max_text_length,
- self.max_text_length
- ],
- dtype="float32")
- feed_list = [
- image, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
- gsrm_slf_attn_bias2
- ]
- labels = {
- 'encoder_word_pos': encoder_word_pos,
- 'gsrm_word_pos': gsrm_word_pos,
- 'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
- 'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2
- }
- return image, labels, loader
-
- def __call__(self, mode):
- image, labels, loader = self.create_feed(mode)
- if self.tps is None:
- inputs = image
- else:
- inputs = self.tps(image)
- conv_feas = self.backbone(inputs)
- predicts = self.head(conv_feas, labels, mode)
- decoded_out = predicts['decoded_out']
- if mode == "train":
- loss = self.loss(predicts, labels)
- if self.loss_type == "attention":
- label = labels['label_out']
- else:
- label = labels['label']
- if self.loss_type == 'srn':
- total_loss, img_loss, word_loss = self.loss(predicts, labels)
- outputs = {
- 'total_loss': total_loss,
- 'img_loss': img_loss,
- 'word_loss': word_loss,
- 'decoded_out': decoded_out,
- 'label': label
- }
- else:
- outputs = {'total_loss':loss, 'decoded_out':\
- decoded_out, 'label':label}
- return loader, outputs
-
- elif mode == "export":
- predict = predicts['predict']
- if self.loss_type == "ctc":
- predict = fluid.layers.softmax(predict)
- if self.loss_type == "srn":
- raise Exception(
- "Warning! SRN does not support export model currently")
- return [image, {'decoded_out': decoded_out, 'predicts': predict}]
- else:
- predict = predicts['predict']
- if self.loss_type == "ctc":
- predict = fluid.layers.softmax(predict)
- return loader, {'decoded_out': decoded_out, 'predicts': predict}
diff --git a/ppocr/modeling/backbones/__init__.py b/ppocr/modeling/backbones/__init__.py
index abf198b97e6e818e1fbe59006f98492640bcee54..9b873728286cfaab94ea0c8110a9e66929cda86b 100755
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -11,3 +11,26 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+__all__ = ['build_backbone']
+
+
+def build_backbone(config, model_type):
+ if model_type == 'det':
+ from .det_mobilenet_v3 import MobileNetV3
+ from .det_resnet_vd import ResNet
+
+ support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
+ elif model_type == 'rec':
+ from .rec_mobilenet_v3 import MobileNetV3
+ from .rec_resnet_vd import ResNet
+ support_dict = ['MobileNetV3', 'ResNet', 'ResNet_FPN']
+ else:
+ raise NotImplementedError
+
+ module_name = config.pop('name')
+ assert module_name in support_dict, Exception(
+ 'when model typs is {}, backbone only support {}'.format(model_type,
+ support_dict))
+ module_class = eval(module_name)(**config)
+ return module_class
diff --git a/ppocr/modeling/backbones/det_mobilenet_v3.py b/ppocr/modeling/backbones/det_mobilenet_v3.py
index 87f5dd72452bbd4be96f8532ff6486c299318c5a..52dd34da61087f0b7b53724762d89c6db540b3fc 100755
--- a/ppocr/modeling/backbones/det_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/det_mobilenet_v3.py
@@ -1,40 +1,48 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
__all__ = ['MobileNetV3']
-class MobileNetV3():
- def __init__(self, params):
+def make_divisible(v, divisor=8, min_value=None):
+ if min_value is None:
+ min_value = divisor
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+ if new_v < 0.9 * v:
+ new_v += divisor
+ return new_v
+
+
+class MobileNetV3(nn.Layer):
+ def __init__(self, in_channels=3, model_name='large', scale=0.5, **kwargs):
"""
the MobilenetV3 backbone network for detection module.
Args:
params(dict): the super parameters for build network
"""
- self.scale = params['scale']
- model_name = params['model_name']
- self.inplanes = 16
+ super(MobileNetV3, self).__init__()
if model_name == "large":
- self.cfg = [
+ cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', 1],
[3, 64, 24, False, 'relu', 2],
@@ -52,10 +60,9 @@ class MobileNetV3():
[5, 960, 160, True, 'hard_swish', 1],
[5, 960, 160, True, 'hard_swish', 1],
]
- self.cls_ch_squeeze = 960
- self.cls_ch_expand = 1280
+ cls_ch_squeeze = 960
elif model_name == "small":
- self.cfg = [
+ cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', 2],
[3, 72, 24, False, 'relu', 2],
@@ -69,183 +76,203 @@ class MobileNetV3():
[5, 576, 96, True, 'hard_swish', 1],
[5, 576, 96, True, 'hard_swish', 1],
]
- self.cls_ch_squeeze = 576
- self.cls_ch_expand = 1280
+ cls_ch_squeeze = 576
else:
raise NotImplementedError("mode[" + model_name +
"_model] is not implemented!")
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
- assert self.scale in supported_scale, \
- "supported scale are {} but input scale is {}".format(supported_scale, self.scale)
-
- def __call__(self, input):
- scale = self.scale
- inplanes = self.inplanes
- cfg = self.cfg
- cls_ch_squeeze = self.cls_ch_squeeze
- cls_ch_expand = self.cls_ch_expand
- #conv1
- conv = self.conv_bn_layer(
- input,
- filter_size=3,
- num_filters=self.make_divisible(inplanes * scale),
+ assert scale in supported_scale, \
+ "supported scale are {} but input scale is {}".format(supported_scale, scale)
+ inplanes = 16
+ # conv1
+ self.conv = ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=make_divisible(inplanes * scale),
+ kernel_size=3,
stride=2,
padding=1,
- num_groups=1,
+ groups=1,
if_act=True,
act='hard_swish',
name='conv1')
+
+ self.stages = []
+ self.out_channels = []
+ block_list = []
i = 0
- inplanes = self.make_divisible(inplanes * scale)
- outs = []
- for layer_cfg in cfg:
- if layer_cfg[5] == 2 and i > 2:
- outs.append(conv)
- conv = self.residual_unit(
- input=conv,
- num_in_filter=inplanes,
- num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
- num_out_filter=self.make_divisible(scale * layer_cfg[2]),
- act=layer_cfg[4],
- stride=layer_cfg[5],
- filter_size=layer_cfg[0],
- use_se=layer_cfg[3],
- name='conv' + str(i + 2))
- inplanes = self.make_divisible(scale * layer_cfg[2])
+ inplanes = make_divisible(inplanes * scale)
+ for (k, exp, c, se, nl, s) in cfg:
+ if s == 2 and i > 2:
+ self.out_channels.append(inplanes)
+ self.stages.append(nn.Sequential(*block_list))
+ block_list = []
+ block_list.append(
+ ResidualUnit(
+ in_channels=inplanes,
+ mid_channels=make_divisible(scale * exp),
+ out_channels=make_divisible(scale * c),
+ kernel_size=k,
+ stride=s,
+ use_se=se,
+ act=nl,
+ name="conv" + str(i + 2)))
+ inplanes = make_divisible(scale * c)
i += 1
+ block_list.append(
+ ConvBNLayer(
+ in_channels=inplanes,
+ out_channels=make_divisible(scale * cls_ch_squeeze),
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ groups=1,
+ if_act=True,
+ act='hard_swish',
+ name='conv_last'))
- conv = self.conv_bn_layer(
- input=conv,
- filter_size=1,
- num_filters=self.make_divisible(scale * cls_ch_squeeze),
- stride=1,
- padding=0,
- num_groups=1,
- if_act=True,
- act='hard_swish',
- name='conv_last')
- outs.append(conv)
- return outs
-
- def conv_bn_layer(self,
- input,
- filter_size,
- num_filters,
- stride,
- padding,
- num_groups=1,
- if_act=True,
- act=None,
- name=None,
- use_cudnn=True,
- res_last_bn_init=False):
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
+ self.stages.append(nn.Sequential(*block_list))
+ self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
+ for i, stage in enumerate(self.stages):
+ self.add_sublayer(sublayer=stage, name="stage{}".format(i))
+
+ def forward(self, x):
+ x = self.conv(x)
+ out_list = []
+ for stage in self.stages:
+ x = stage(x)
+ out_list.append(x)
+ return out_list
+
+
+class ConvBNLayer(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride,
+ padding,
+ groups=1,
+ if_act=True,
+ act=None,
+ name=None):
+ super(ConvBNLayer, self).__init__()
+ self.if_act = if_act
+ self.act = act
+ self.conv = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
stride=stride,
padding=padding,
- groups=num_groups,
- act=None,
- use_cudnn=use_cudnn,
- param_attr=ParamAttr(name=name + '_weights'),
+ groups=groups,
+ weight_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
- bn_name = name + '_bn'
- bn = fluid.layers.batch_norm(
- input=conv,
- param_attr=ParamAttr(
- name=bn_name + "_scale",
- regularizer=fluid.regularizer.L2DecayRegularizer(
- regularization_coeff=0.0)),
- bias_attr=ParamAttr(
- name=bn_name + "_offset",
- regularizer=fluid.regularizer.L2DecayRegularizer(
- regularization_coeff=0.0)),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
- if if_act:
- if act == 'relu':
- bn = fluid.layers.relu(bn)
- elif act == 'hard_swish':
- bn = fluid.layers.hard_swish(bn)
- return bn
-
- def make_divisible(self, v, divisor=8, min_value=None):
- if min_value is None:
- min_value = divisor
- new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
- if new_v < 0.9 * v:
- new_v += divisor
- return new_v
-
- def se_block(self, input, num_out_filter, ratio=4, name=None):
- num_mid_filter = num_out_filter // ratio
- pool = fluid.layers.pool2d(
- input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
- conv1 = fluid.layers.conv2d(
- input=pool,
- filter_size=1,
- num_filters=num_mid_filter,
- act='relu',
- param_attr=ParamAttr(name=name + '_1_weights'),
- bias_attr=ParamAttr(name=name + '_1_offset'))
- conv2 = fluid.layers.conv2d(
- input=conv1,
- filter_size=1,
- num_filters=num_out_filter,
- act='hard_sigmoid',
- param_attr=ParamAttr(name=name + '_2_weights'),
- bias_attr=ParamAttr(name=name + '_2_offset'))
- scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
- return scale
-
- def residual_unit(self,
- input,
- num_in_filter,
- num_mid_filter,
- num_out_filter,
- stride,
- filter_size,
- act=None,
- use_se=False,
- name=None):
-
- conv0 = self.conv_bn_layer(
- input=input,
- filter_size=1,
- num_filters=num_mid_filter,
+
+ self.bn = nn.BatchNorm(
+ num_channels=out_channels,
+ act=None,
+ param_attr=ParamAttr(name=name + "_bn_scale"),
+ bias_attr=ParamAttr(name=name + "_bn_offset"),
+ moving_mean_name=name + "_bn_mean",
+ moving_variance_name=name + "_bn_variance")
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ if self.if_act:
+ if self.act == "relu":
+ x = F.relu(x)
+ elif self.act == "hard_swish":
+ x = F.hard_swish(x)
+ else:
+ print("The activation function is selected incorrectly.")
+ exit()
+ return x
+
+
+class ResidualUnit(nn.Layer):
+ def __init__(self,
+ in_channels,
+ mid_channels,
+ out_channels,
+ kernel_size,
+ stride,
+ use_se,
+ act=None,
+ name=''):
+ super(ResidualUnit, self).__init__()
+ self.if_shortcut = stride == 1 and in_channels == out_channels
+ self.if_se = use_se
+
+ self.expand_conv = ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=mid_channels,
+ kernel_size=1,
stride=1,
padding=0,
if_act=True,
act=act,
- name=name + '_expand')
-
- conv1 = self.conv_bn_layer(
- input=conv0,
- filter_size=filter_size,
- num_filters=num_mid_filter,
+ name=name + "_expand")
+ self.bottleneck_conv = ConvBNLayer(
+ in_channels=mid_channels,
+ out_channels=mid_channels,
+ kernel_size=kernel_size,
stride=stride,
- padding=int((filter_size - 1) // 2),
+ padding=int((kernel_size - 1) // 2),
+ groups=mid_channels,
if_act=True,
act=act,
- num_groups=num_mid_filter,
- use_cudnn=False,
- name=name + '_depthwise')
- if use_se:
- conv1 = self.se_block(
- input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
-
- conv2 = self.conv_bn_layer(
- input=conv1,
- filter_size=1,
- num_filters=num_out_filter,
+ name=name + "_depthwise")
+ if self.if_se:
+ self.mid_se = SEModule(mid_channels, name=name + "_se")
+ self.linear_conv = ConvBNLayer(
+ in_channels=mid_channels,
+ out_channels=out_channels,
+ kernel_size=1,
stride=1,
padding=0,
if_act=False,
- name=name + '_linear',
- res_last_bn_init=True)
- if num_in_filter != num_out_filter or stride != 1:
- return conv2
- else:
- return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
+ act=None,
+ name=name + "_linear")
+
+ def forward(self, inputs):
+ x = self.expand_conv(inputs)
+ x = self.bottleneck_conv(x)
+ if self.if_se:
+ x = self.mid_se(x)
+ x = self.linear_conv(x)
+ if self.if_shortcut:
+ x = paddle.elementwise_add(inputs, x)
+ return x
+
+
+class SEModule(nn.Layer):
+ def __init__(self, in_channels, reduction=4, name=""):
+ super(SEModule, self).__init__()
+ self.avg_pool = nn.Pool2D(
+ pool_type="avg", global_pooling=True, use_cudnn=False)
+ self.conv1 = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=in_channels // reduction,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ weight_attr=ParamAttr(name=name + "_1_weights"),
+ bias_attr=ParamAttr(name=name + "_1_offset"))
+ self.conv2 = nn.Conv2d(
+ in_channels=in_channels // reduction,
+ out_channels=in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ weight_attr=ParamAttr(name + "_2_weights"),
+ bias_attr=ParamAttr(name=name + "_2_offset"))
+
+ def forward(self, inputs):
+ outputs = self.avg_pool(inputs)
+ outputs = self.conv1(outputs)
+ outputs = F.relu(outputs)
+ outputs = self.conv2(outputs)
+ outputs = F.hard_sigmoid(outputs)
+ return inputs * outputs
diff --git a/ppocr/modeling/backbones/det_resnet_vd.py b/ppocr/modeling/backbones/det_resnet_vd.py
old mode 100755
new mode 100644
index 52a441f312c139a5df5b35640db318280bc0fc4c..b501bec8486b2982edcaa4550c952c557132a681
--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
@@ -1,252 +1,329 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
+from paddle import nn
+from paddle.nn import functional as F
+from paddle import ParamAttr
__all__ = ["ResNet"]
-class ResNet(object):
- def __init__(self, params):
+class ResNet(nn.Layer):
+ def __init__(self, in_channels=3, layers=50, **kwargs):
"""
the Resnet backbone network for detection module.
Args:
params(dict): the super parameters for network build
"""
- self.layers = params['layers']
- supported_layers = [18, 34, 50, 101, 152]
- assert self.layers in supported_layers, \
- "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
- self.is_3x3 = True
-
- def __call__(self, input):
- layers = self.layers
- is_3x3 = self.is_3x3
- if layers == 18:
- depth = [2, 2, 2, 2]
- elif layers == 34 or layers == 50:
- depth = [3, 4, 6, 3]
- elif layers == 101:
- depth = [3, 4, 23, 3]
- elif layers == 152:
- depth = [3, 8, 36, 3]
- elif layers == 200:
- depth = [3, 12, 48, 3]
+ super(ResNet, self).__init__()
+ supported_layers = {
+ 18: {
+ 'depth': [2, 2, 2, 2],
+ 'block_class': BasicBlock
+ },
+ 34: {
+ 'depth': [3, 4, 6, 3],
+ 'block_class': BasicBlock
+ },
+ 50: {
+ 'depth': [3, 4, 6, 3],
+ 'block_class': BottleneckBlock
+ },
+ 101: {
+ 'depth': [3, 4, 23, 3],
+ 'block_class': BottleneckBlock
+ },
+ 152: {
+ 'depth': [3, 8, 36, 3],
+ 'block_class': BottleneckBlock
+ },
+ 200: {
+ 'depth': [3, 12, 48, 3],
+ 'block_class': BottleneckBlock
+ }
+ }
+ assert layers in supported_layers, \
+ "supported layers are {} but input layer is {}".format(supported_layers.keys(), layers)
+ is_3x3 = True
+
+ depth = supported_layers[layers]['depth']
+ block_class = supported_layers[layers]['block_class']
+
num_filters = [64, 128, 256, 512]
- outs = []
+ conv = []
if is_3x3 == False:
- conv = self.conv_bn_layer(
- input=input,
- num_filters=64,
- filter_size=7,
- stride=2,
- act='relu')
+ conv.append(
+ ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=64,
+ kernel_size=7,
+ stride=2,
+ act='relu'))
else:
- conv = self.conv_bn_layer(
- input=input,
- num_filters=32,
- filter_size=3,
- stride=2,
- act='relu',
- name='conv1_1')
- conv = self.conv_bn_layer(
- input=conv,
- num_filters=32,
- filter_size=3,
- stride=1,
- act='relu',
- name='conv1_2')
- conv = self.conv_bn_layer(
- input=conv,
- num_filters=64,
- filter_size=3,
- stride=1,
- act='relu',
- name='conv1_3')
-
- conv = fluid.layers.pool2d(
- input=conv,
- pool_size=3,
- pool_stride=2,
- pool_padding=1,
- pool_type='max')
-
- if layers >= 50:
- for block in range(len(depth)):
- for i in range(depth[block]):
- if layers in [101, 152, 200] and block == 2:
+ conv.append(
+ ConvBNLayer(
+ in_channels=3,
+ out_channels=32,
+ kernel_size=3,
+ stride=2,
+ act='relu',
+ name='conv1_1'))
+ conv.append(
+ ConvBNLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ act='relu',
+ name='conv1_2'))
+ conv.append(
+ ConvBNLayer(
+ in_channels=32,
+ out_channels=64,
+ kernel_size=3,
+ stride=1,
+ act='relu',
+ name='conv1_3'))
+ self.conv1 = nn.Sequential(*conv)
+ self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+ self.stages = []
+ self.out_channels = []
+ in_ch = 64
+ for block_index in range(len(depth)):
+ block_list = []
+ for i in range(depth[block_index]):
+ if layers >= 50:
+ if layers in [101, 152, 200] and block_index == 2:
if i == 0:
- conv_name = "res" + str(block + 2) + "a"
+ conv_name = "res" + str(block_index + 2) + "a"
else:
- conv_name = "res" + str(block + 2) + "b" + str(i)
+ conv_name = "res" + str(block_index +
+ 2) + "b" + str(i)
else:
- conv_name = "res" + str(block + 2) + chr(97 + i)
- conv = self.bottleneck_block(
- input=conv,
- num_filters=num_filters[block],
- stride=2 if i == 0 and block != 0 else 1,
- if_first=block == i == 0,
- name=conv_name)
- outs.append(conv)
- else:
- for block in range(len(depth)):
- for i in range(depth[block]):
- conv_name = "res" + str(block + 2) + chr(97 + i)
- conv = self.basic_block(
- input=conv,
- num_filters=num_filters[block],
- stride=2 if i == 0 and block != 0 else 1,
- if_first=block == i == 0,
- name=conv_name)
- outs.append(conv)
- return outs
-
- def conv_bn_layer(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
+ conv_name = "res" + str(block_index + 2) + chr(97 + i)
+ else:
+ conv_name = "res" + str(block_index + 2) + chr(97 + i)
+ block_list.append(
+ block_class(
+ in_channels=in_ch,
+ out_channels=num_filters[block_index],
+ stride=2 if i == 0 and block_index != 0 else 1,
+ if_first=block_index == i == 0,
+ name=conv_name))
+ in_ch = block_list[-1].out_channels
+ self.out_channels.append(in_ch)
+ self.stages.append(nn.Sequential(*block_list))
+ for i, stage in enumerate(self.stages):
+ self.add_sublayer(sublayer=stage, name="stage{}".format(i))
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.pool(x)
+ out_list = []
+ for stage in self.stages:
+ x = stage(x)
+ out_list.append(x)
+ return out_list
+
+
+class ConvBNLayer(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ groups=1,
+ act=None,
+ name=None):
+ super(ConvBNLayer, self).__init__()
+ self.conv = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
stride=stride,
- padding=(filter_size - 1) // 2,
+ padding=(kernel_size - 1) // 2,
groups=groups,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
+ weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
- return fluid.layers.batch_norm(
- input=conv,
+ self.bn = nn.BatchNorm(
+ num_channels=out_channels,
act=act,
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
-
- def conv_bn_layer_new(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- pool = fluid.layers.pool2d(
- input=input,
- pool_size=2,
- pool_stride=2,
- pool_padding=0,
- pool_type='avg',
- ceil_mode=True)
-
- conv = fluid.layers.conv2d(
- input=pool,
- num_filters=num_filters,
- filter_size=filter_size,
+ param_attr=ParamAttr(name=bn_name + "_scale"),
+ bias_attr=ParamAttr(name=bn_name + "_offset"),
+ moving_mean_name=bn_name + "_mean",
+ moving_variance_name=bn_name + "_variance")
+
+ def __call__(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ return x
+
+
+class ConvBNLayerNew(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ groups=1,
+ act=None,
+ name=None):
+ super(ConvBNLayerNew, self).__init__()
+ self.pool = nn.AvgPool2d(
+ kernel_size=2, stride=2, padding=0, ceil_mode=True)
+
+ self.conv = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
stride=1,
- padding=(filter_size - 1) // 2,
+ padding=(kernel_size - 1) // 2,
groups=groups,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
+ weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
- return fluid.layers.batch_norm(
- input=conv,
+ self.bn = nn.BatchNorm(
+ num_channels=out_channels,
act=act,
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
-
- def shortcut(self, input, ch_out, stride, name, if_first=False):
- ch_in = input.shape[1]
- if ch_in != ch_out or stride != 1:
+ param_attr=ParamAttr(name=bn_name + "_scale"),
+ bias_attr=ParamAttr(name=bn_name + "_offset"),
+ moving_mean_name=bn_name + "_mean",
+ moving_variance_name=bn_name + "_variance")
+
+ def __call__(self, x):
+ x = self.pool(x)
+ x = self.conv(x)
+ x = self.bn(x)
+ return x
+
+
+class ShortCut(nn.Layer):
+ def __init__(self, in_channels, out_channels, stride, name, if_first=False):
+ super(ShortCut, self).__init__()
+ self.use_conv = True
+ if in_channels != out_channels or stride != 1:
if if_first:
- return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+ self.conv = ConvBNLayer(
+ in_channels, out_channels, 1, stride, name=name)
else:
- return self.conv_bn_layer_new(
- input, ch_out, 1, stride, name=name)
+ self.conv = ConvBNLayerNew(
+ in_channels, out_channels, 1, stride, name=name)
elif if_first:
- return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+ self.conv = ConvBNLayer(
+ in_channels, out_channels, 1, stride, name=name)
else:
- return input
+ self.use_conv = False
+
+ def forward(self, x):
+ if self.use_conv:
+ x = self.conv(x)
+ return x
- def bottleneck_block(self, input, num_filters, stride, name, if_first):
- conv0 = self.conv_bn_layer(
- input=input,
- num_filters=num_filters,
- filter_size=1,
+
+class BottleneckBlock(nn.Layer):
+ def __init__(self, in_channels, out_channels, stride, name, if_first):
+ super(BottleneckBlock, self).__init__()
+ self.conv0 = ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=1,
act='relu',
name=name + "_branch2a")
- conv1 = self.conv_bn_layer(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
+ self.conv1 = ConvBNLayer(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
- conv2 = self.conv_bn_layer(
- input=conv1,
- num_filters=num_filters * 4,
- filter_size=1,
+ self.conv2 = ConvBNLayer(
+ in_channels=out_channels,
+ out_channels=out_channels * 4,
+ kernel_size=1,
act=None,
name=name + "_branch2c")
- short = self.shortcut(
- input,
- num_filters * 4,
- stride,
+ self.short = ShortCut(
+ in_channels=in_channels,
+ out_channels=out_channels * 4,
+ stride=stride,
if_first=if_first,
name=name + "_branch1")
+ self.out_channels = out_channels * 4
+
+ def forward(self, x):
+ y = self.conv0(x)
+ y = self.conv1(y)
+ y = self.conv2(y)
+ y = y + self.short(x)
+ y = F.relu(y)
+ return y
- return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
- def basic_block(self, input, num_filters, stride, name, if_first):
- conv0 = self.conv_bn_layer(
- input=input,
- num_filters=num_filters,
- filter_size=3,
+class BasicBlock(nn.Layer):
+ def __init__(self, in_channels, out_channels, stride, name, if_first):
+ super(BasicBlock, self).__init__()
+ self.conv0 = ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=3,
act='relu',
stride=stride,
name=name + "_branch2a")
- conv1 = self.conv_bn_layer(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
+ self.conv1 = ConvBNLayer(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=3,
act=None,
name=name + "_branch2b")
- short = self.shortcut(
- input,
- num_filters,
- stride,
+ self.short = ShortCut(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ stride=stride,
if_first=if_first,
name=name + "_branch1")
- return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
+ self.out_channels = out_channels
+
+ def forward(self, x):
+ y = self.conv0(x)
+ y = self.conv1(y)
+ y = y + self.short(x)
+ return F.relu(y)
+
+
+if __name__ == '__main__':
+ import paddle
+
+ paddle.disable_static()
+ x = paddle.zeros([1, 3, 640, 640])
+ x = paddle.to_variable(x)
+ print(x.shape)
+ net = ResNet(layers=18)
+ y = net(x)
+
+ for stage in y:
+ print(stage.shape)
+ # paddle.save(net.state_dict(),'1.pth')
diff --git a/ppocr/modeling/backbones/det_resnet_vd_sast.py b/ppocr/modeling/backbones/det_resnet_vd_sast.py
deleted file mode 100644
index 14fe713852d009bef8dd7f3739c2e8070789e359..0000000000000000000000000000000000000000
--- a/ppocr/modeling/backbones/det_resnet_vd_sast.py
+++ /dev/null
@@ -1,274 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-
-__all__ = ["ResNet"]
-
-
-class ResNet(object):
- def __init__(self, params):
- """
- the Resnet backbone network for detection module.
- Args:
- params(dict): the super parameters for network build
- """
- self.layers = params['layers']
- supported_layers = [18, 34, 50, 101, 152]
- assert self.layers in supported_layers, \
- "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
- self.is_3x3 = True
-
- def __call__(self, input):
- layers = self.layers
- is_3x3 = self.is_3x3
- # if layers == 18:
- # depth = [2, 2, 2, 2]
- # elif layers == 34 or layers == 50:
- # depth = [3, 4, 6, 3]
- # elif layers == 101:
- # depth = [3, 4, 23, 3]
- # elif layers == 152:
- # depth = [3, 8, 36, 3]
- # elif layers == 200:
- # depth = [3, 12, 48, 3]
- # num_filters = [64, 128, 256, 512]
- # outs = []
-
- if layers == 18:
- depth = [2, 2, 2, 2]#, 3, 3]
- elif layers == 34 or layers == 50:
- #depth = [3, 4, 6, 3]#, 3, 3]
- depth = [3, 4, 6, 3, 3]#, 3]
- elif layers == 101:
- depth = [3, 4, 23, 3]#, 3, 3]
- elif layers == 152:
- depth = [3, 8, 36, 3]#, 3, 3]
- num_filters = [64, 128, 256, 512, 512]#, 512]
- blocks = {}
-
- idx = 'block_0'
- blocks[idx] = input
-
- if is_3x3 == False:
- conv = self.conv_bn_layer(
- input=input,
- num_filters=64,
- filter_size=7,
- stride=2,
- act='relu')
- else:
- conv = self.conv_bn_layer(
- input=input,
- num_filters=32,
- filter_size=3,
- stride=2,
- act='relu',
- name='conv1_1')
- conv = self.conv_bn_layer(
- input=conv,
- num_filters=32,
- filter_size=3,
- stride=1,
- act='relu',
- name='conv1_2')
- conv = self.conv_bn_layer(
- input=conv,
- num_filters=64,
- filter_size=3,
- stride=1,
- act='relu',
- name='conv1_3')
- idx = 'block_1'
- blocks[idx] = conv
-
- conv = fluid.layers.pool2d(
- input=conv,
- pool_size=3,
- pool_stride=2,
- pool_padding=1,
- pool_type='max')
-
- if layers >= 50:
- for block in range(len(depth)):
- for i in range(depth[block]):
- if layers in [101, 152, 200] and block == 2:
- if i == 0:
- conv_name = "res" + str(block + 2) + "a"
- else:
- conv_name = "res" + str(block + 2) + "b" + str(i)
- else:
- conv_name = "res" + str(block + 2) + chr(97 + i)
- conv = self.bottleneck_block(
- input=conv,
- num_filters=num_filters[block],
- stride=2 if i == 0 and block != 0 else 1,
- if_first=block == i == 0,
- name=conv_name)
- # outs.append(conv)
- idx = 'block_' + str(block + 2)
- blocks[idx] = conv
- else:
- for block in range(len(depth)):
- for i in range(depth[block]):
- conv_name = "res" + str(block + 2) + chr(97 + i)
- conv = self.basic_block(
- input=conv,
- num_filters=num_filters[block],
- stride=2 if i == 0 and block != 0 else 1,
- if_first=block == i == 0,
- name=conv_name)
- # outs.append(conv)
- idx = 'block_' + str(block + 2)
- blocks[idx] = conv
- # return outs
- return blocks
-
- def conv_bn_layer(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=(filter_size - 1) // 2,
- groups=groups,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
- bias_attr=False)
- if name == "conv1":
- bn_name = "bn_" + name
- else:
- bn_name = "bn" + name[3:]
- return fluid.layers.batch_norm(
- input=conv,
- act=act,
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
-
- def conv_bn_layer_new(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- pool = fluid.layers.pool2d(
- input=input,
- pool_size=2,
- pool_stride=2,
- pool_padding=0,
- pool_type='avg',
- ceil_mode=True)
-
- conv = fluid.layers.conv2d(
- input=pool,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=1,
- padding=(filter_size - 1) // 2,
- groups=groups,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
- bias_attr=False)
- if name == "conv1":
- bn_name = "bn_" + name
- else:
- bn_name = "bn" + name[3:]
- return fluid.layers.batch_norm(
- input=conv,
- act=act,
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
-
- def shortcut(self, input, ch_out, stride, name, if_first=False):
- ch_in = input.shape[1]
- if ch_in != ch_out or stride != 1:
- if if_first:
- return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
- else:
- return self.conv_bn_layer_new(
- input, ch_out, 1, stride, name=name)
- elif if_first:
- return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
- else:
- return input
-
- def bottleneck_block(self, input, num_filters, stride, name, if_first):
- conv0 = self.conv_bn_layer(
- input=input,
- num_filters=num_filters,
- filter_size=1,
- act='relu',
- name=name + "_branch2a")
- conv1 = self.conv_bn_layer(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
- stride=stride,
- act='relu',
- name=name + "_branch2b")
- conv2 = self.conv_bn_layer(
- input=conv1,
- num_filters=num_filters * 4,
- filter_size=1,
- act=None,
- name=name + "_branch2c")
-
- short = self.shortcut(
- input,
- num_filters * 4,
- stride,
- if_first=if_first,
- name=name + "_branch1")
-
- return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
-
- def basic_block(self, input, num_filters, stride, name, if_first):
- conv0 = self.conv_bn_layer(
- input=input,
- num_filters=num_filters,
- filter_size=3,
- act='relu',
- stride=stride,
- name=name + "_branch2a")
- conv1 = self.conv_bn_layer(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
- act=None,
- name=name + "_branch2b")
- short = self.shortcut(
- input,
- num_filters,
- stride,
- if_first=if_first,
- name=name + "_branch1")
- return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
diff --git a/ppocr/modeling/backbones/rec_mobilenet_v3.py b/ppocr/modeling/backbones/rec_mobilenet_v3.py
old mode 100755
new mode 100644
index ff39a81210b7b71914f3c447b5e0035ac03db73b..bcba860022a707ea1569340c6344b662440a8dc5
--- a/ppocr/modeling/backbones/rec_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py
@@ -1,53 +1,49 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from paddle import nn
-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
+from ppocr.modeling.backbones.det_mobilenet_v3 import ResidualUnit, ConvBNLayer, make_divisible
-__all__ = [
- 'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5',
- 'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0',
- 'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35',
- 'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75',
- 'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25'
-]
+__all__ = ['MobileNetV3']
-class MobileNetV3():
- def __init__(self, params):
- self.scale = params.get("scale", 0.5)
- model_name = params.get("model_name", "small")
- large_stride = params.get("large_stride", [1, 2, 2, 2])
- small_stride = params.get("small_stride", [2, 2, 2, 2])
+class MobileNetV3(nn.Layer):
+ def __init__(self,
+ in_channels=3,
+ model_name='small',
+ scale=0.5,
+ large_stride=None,
+ small_stride=None,
+ **kwargs):
+ super(MobileNetV3, self).__init__()
+ if small_stride is None:
+ small_stride = [2, 2, 2, 2]
+ if large_stride is None:
+ large_stride = [1, 2, 2, 2]
assert isinstance(large_stride, list), "large_stride type must " \
- "be list but got {}".format(type(large_stride))
+ "be list but got {}".format(type(large_stride))
assert isinstance(small_stride, list), "small_stride type must " \
- "be list but got {}".format(type(small_stride))
+ "be list but got {}".format(type(small_stride))
assert len(large_stride) == 4, "large_stride length must be " \
- "4 but got {}".format(len(large_stride))
+ "4 but got {}".format(len(large_stride))
assert len(small_stride) == 4, "small_stride length must be " \
- "4 but got {}".format(len(small_stride))
+ "4 but got {}".format(len(small_stride))
- self.inplanes = 16
if model_name == "large":
- self.cfg = [
+ cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', large_stride[0]],
[3, 64, 24, False, 'relu', (large_stride[1], 1)],
@@ -65,10 +61,9 @@ class MobileNetV3():
[5, 960, 160, True, 'hard_swish', 1],
[5, 960, 160, True, 'hard_swish', 1],
]
- self.cls_ch_squeeze = 960
- self.cls_ch_expand = 1280
+ cls_ch_squeeze = 960
elif model_name == "small":
- self.cfg = [
+ cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', (small_stride[0], 1)],
[3, 72, 24, False, 'relu', (small_stride[1], 1)],
@@ -82,186 +77,72 @@ class MobileNetV3():
[5, 576, 96, True, 'hard_swish', 1],
[5, 576, 96, True, 'hard_swish', 1],
]
- self.cls_ch_squeeze = 576
- self.cls_ch_expand = 1280
+ cls_ch_squeeze = 576
else:
raise NotImplementedError("mode[" + model_name +
"_model] is not implemented!")
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
- assert self.scale in supported_scale, \
- "supported scales are {} but input scale is {}".format(supported_scale, self.scale)
-
- def __call__(self, input):
- scale = self.scale
- inplanes = self.inplanes
- cfg = self.cfg
- cls_ch_squeeze = self.cls_ch_squeeze
- cls_ch_expand = self.cls_ch_expand
- #conv1
- conv = self.conv_bn_layer(
- input,
- filter_size=3,
- num_filters=self.make_divisible(inplanes * scale),
+ assert scale in supported_scale, \
+ "supported scales are {} but input scale is {}".format(supported_scale, scale)
+
+ inplanes = 16
+ # conv1
+ self.conv1 = ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=make_divisible(inplanes * scale),
+ kernel_size=3,
stride=2,
padding=1,
- num_groups=1,
+ groups=1,
if_act=True,
act='hard_swish',
name='conv1')
i = 0
- inplanes = self.make_divisible(inplanes * scale)
- for layer_cfg in cfg:
- conv = self.residual_unit(
- input=conv,
- num_in_filter=inplanes,
- num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
- num_out_filter=self.make_divisible(scale * layer_cfg[2]),
- act=layer_cfg[4],
- stride=layer_cfg[5],
- filter_size=layer_cfg[0],
- use_se=layer_cfg[3],
- name='conv' + str(i + 2))
- inplanes = self.make_divisible(scale * layer_cfg[2])
+ block_list = []
+ inplanes = make_divisible(inplanes * scale)
+ for (k, exp, c, se, nl, s) in cfg:
+ block_list.append(
+ ResidualUnit(
+ in_channels=inplanes,
+ mid_channels=make_divisible(scale * exp),
+ out_channels=make_divisible(scale * c),
+ kernel_size=k,
+ stride=s,
+ use_se=se,
+ act=nl,
+ name='conv' + str(i + 2)))
+ inplanes = make_divisible(scale * c)
i += 1
+ self.blocks = nn.Sequential(*block_list)
- conv = self.conv_bn_layer(
- input=conv,
- filter_size=1,
- num_filters=self.make_divisible(scale * cls_ch_squeeze),
+ self.conv2 = ConvBNLayer(
+ in_channels=inplanes,
+ out_channels=make_divisible(scale * cls_ch_squeeze),
+ kernel_size=1,
stride=1,
padding=0,
- num_groups=1,
+ groups=1,
if_act=True,
act='hard_swish',
name='conv_last')
- conv = fluid.layers.pool2d(
- input=conv,
- pool_size=2,
- pool_stride=2,
- pool_padding=0,
- pool_type='max')
- return conv
-
- def conv_bn_layer(self,
- input,
- filter_size,
- num_filters,
- stride,
- padding,
- num_groups=1,
- if_act=True,
- act=None,
- name=None,
- use_cudnn=True,
- res_last_bn_init=False):
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=padding,
- groups=num_groups,
- act=None,
- use_cudnn=use_cudnn,
- param_attr=ParamAttr(name=name + '_weights'),
- bias_attr=False)
- bn_name = name + '_bn'
- bn = fluid.layers.batch_norm(
- input=conv,
- param_attr=ParamAttr(
- name=bn_name + "_scale",
- regularizer=fluid.regularizer.L2DecayRegularizer(
- regularization_coeff=0.0)),
- bias_attr=ParamAttr(
- name=bn_name + "_offset",
- regularizer=fluid.regularizer.L2DecayRegularizer(
- regularization_coeff=0.0)),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
- if if_act:
- if act == 'relu':
- bn = fluid.layers.relu(bn)
- elif act == 'hard_swish':
- bn = fluid.layers.hard_swish(bn)
- return bn
-
- def make_divisible(self, v, divisor=8, min_value=None):
- if min_value is None:
- min_value = divisor
- new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
- if new_v < 0.9 * v:
- new_v += divisor
- return new_v
-
- def se_block(self, input, num_out_filter, ratio=4, name=None):
- num_mid_filter = num_out_filter // ratio
- pool = fluid.layers.pool2d(
- input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
- conv1 = fluid.layers.conv2d(
- input=pool,
- filter_size=1,
- num_filters=num_mid_filter,
- act='relu',
- param_attr=ParamAttr(name=name + '_1_weights'),
- bias_attr=ParamAttr(name=name + '_1_offset'))
- conv2 = fluid.layers.conv2d(
- input=conv1,
- filter_size=1,
- num_filters=num_out_filter,
- act='hard_sigmoid',
- param_attr=ParamAttr(name=name + '_2_weights'),
- bias_attr=ParamAttr(name=name + '_2_offset'))
- scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
- return scale
-
- def residual_unit(self,
- input,
- num_in_filter,
- num_mid_filter,
- num_out_filter,
- stride,
- filter_size,
- act=None,
- use_se=False,
- name=None):
-
- conv0 = self.conv_bn_layer(
- input=input,
- filter_size=1,
- num_filters=num_mid_filter,
- stride=1,
- padding=0,
- if_act=True,
- act=act,
- name=name + '_expand')
-
- conv1 = self.conv_bn_layer(
- input=conv0,
- filter_size=filter_size,
- num_filters=num_mid_filter,
- stride=stride,
- padding=int((filter_size - 1) // 2),
- if_act=True,
- act=act,
- num_groups=num_mid_filter,
- use_cudnn=False,
- name=name + '_depthwise')
- if use_se:
- conv1 = self.se_block(
- input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
-
- conv2 = self.conv_bn_layer(
- input=conv1,
- filter_size=1,
- num_filters=num_out_filter,
- stride=1,
- padding=0,
- if_act=False,
- name=name + '_linear',
- res_last_bn_init=True)
- if num_in_filter != num_out_filter or stride != 1:
- return conv2
- else:
- return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
+ self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+ self.out_channels = make_divisible(scale * cls_ch_squeeze)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.blocks(x)
+ x = self.conv2(x)
+ x = self.pool(x)
+ return x
+
+
+if __name__ == '__main__':
+ import paddle
+ paddle.disable_static()
+ x = paddle.zeros((1, 3, 32, 320))
+ x = paddle.to_variable(x)
+ net = MobileNetV3(model_name='small', small_stride=[1, 2, 2, 2])
+ y = net(x)
+ print(y.shape)
diff --git a/ppocr/modeling/backbones/rec_resnet_fpn.py b/ppocr/modeling/backbones/rec_resnet_fpn.py
deleted file mode 100755
index 0a05b5def8b79943f045d9cc941835cddc82bfdb..0000000000000000000000000000000000000000
--- a/ppocr/modeling/backbones/rec_resnet_fpn.py
+++ /dev/null
@@ -1,246 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-
-__all__ = [
- "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"
-]
-
-Trainable = True
-w_nolr = fluid.ParamAttr(trainable=Trainable)
-train_parameters = {
- "input_size": [3, 224, 224],
- "input_mean": [0.485, 0.456, 0.406],
- "input_std": [0.229, 0.224, 0.225],
- "learning_strategy": {
- "name": "piecewise_decay",
- "batch_size": 256,
- "epochs": [30, 60, 90],
- "steps": [0.1, 0.01, 0.001, 0.0001]
- }
-}
-
-
-class ResNet():
- def __init__(self, params):
- self.layers = params['layers']
- self.params = train_parameters
-
- def __call__(self, input):
- layers = self.layers
- supported_layers = [18, 34, 50, 101, 152]
- assert layers in supported_layers, \
- "supported layers are {} but input layer is {}".format(supported_layers, layers)
-
- if layers == 18:
- depth = [2, 2, 2, 2]
- elif layers == 34 or layers == 50:
- depth = [3, 4, 6, 3]
- elif layers == 101:
- depth = [3, 4, 23, 3]
- elif layers == 152:
- depth = [3, 8, 36, 3]
- stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)]
- num_filters = [64, 128, 256, 512]
-
- conv = self.conv_bn_layer(
- input=input,
- num_filters=64,
- filter_size=7,
- stride=2,
- act='relu',
- name="conv1")
- F = []
- if layers >= 50:
- for block in range(len(depth)):
- for i in range(depth[block]):
- if layers in [101, 152] and block == 2:
- if i == 0:
- conv_name = "res" + str(block + 2) + "a"
- else:
- conv_name = "res" + str(block + 2) + "b" + str(i)
- else:
- conv_name = "res" + str(block + 2) + chr(97 + i)
- conv = self.bottleneck_block(
- input=conv,
- num_filters=num_filters[block],
- stride=stride_list[block] if i == 0 else 1,
- name=conv_name)
- F.append(conv)
- else:
- for block in range(len(depth)):
- for i in range(depth[block]):
- conv_name = "res" + str(block + 2) + chr(97 + i)
-
- if i == 0 and block != 0:
- stride = (2, 1)
- else:
- stride = (1, 1)
-
- conv = self.basic_block(
- input=conv,
- num_filters=num_filters[block],
- stride=stride,
- if_first=block == i == 0,
- name=conv_name)
- F.append(conv)
-
- base = F[-1]
- for i in [-2, -3]:
- b, c, w, h = F[i].shape
- if (w, h) == base.shape[2:]:
- base = base
- else:
- base = fluid.layers.conv2d_transpose(
- input=base,
- num_filters=c,
- filter_size=4,
- stride=2,
- padding=1,
- act=None,
- param_attr=w_nolr,
- bias_attr=w_nolr)
- base = fluid.layers.batch_norm(
- base, act="relu", param_attr=w_nolr, bias_attr=w_nolr)
- base = fluid.layers.concat([base, F[i]], axis=1)
- base = fluid.layers.conv2d(
- base,
- num_filters=c,
- filter_size=1,
- param_attr=w_nolr,
- bias_attr=w_nolr)
- base = fluid.layers.conv2d(
- base,
- num_filters=c,
- filter_size=3,
- padding=1,
- param_attr=w_nolr,
- bias_attr=w_nolr)
- base = fluid.layers.batch_norm(
- base, act="relu", param_attr=w_nolr, bias_attr=w_nolr)
-
- base = fluid.layers.conv2d(
- base,
- num_filters=512,
- filter_size=1,
- bias_attr=w_nolr,
- param_attr=w_nolr)
-
- return base
-
- def conv_bn_layer(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=2 if stride == (1, 1) else filter_size,
- dilation=2 if stride == (1, 1) else 1,
- stride=stride,
- padding=(filter_size - 1) // 2,
- groups=groups,
- act=None,
- param_attr=ParamAttr(
- name=name + "_weights", trainable=Trainable),
- bias_attr=False,
- name=name + '.conv2d.output.1')
-
- if name == "conv1":
- bn_name = "bn_" + name
- else:
- bn_name = "bn" + name[3:]
- return fluid.layers.batch_norm(
- input=conv,
- act=act,
- name=bn_name + '.output.1',
- param_attr=ParamAttr(
- name=bn_name + '_scale', trainable=Trainable),
- bias_attr=ParamAttr(
- bn_name + '_offset', trainable=Trainable),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance', )
-
- def shortcut(self, input, ch_out, stride, is_first, name):
- ch_in = input.shape[1]
- if ch_in != ch_out or stride != 1 or is_first == True:
- if stride == (1, 1):
- return self.conv_bn_layer(input, ch_out, 1, 1, name=name)
- else: #stride == (2,2)
- return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
-
- else:
- return input
-
- def bottleneck_block(self, input, num_filters, stride, name):
- conv0 = self.conv_bn_layer(
- input=input,
- num_filters=num_filters,
- filter_size=1,
- act='relu',
- name=name + "_branch2a")
- conv1 = self.conv_bn_layer(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
- stride=stride,
- act='relu',
- name=name + "_branch2b")
- conv2 = self.conv_bn_layer(
- input=conv1,
- num_filters=num_filters * 4,
- filter_size=1,
- act=None,
- name=name + "_branch2c")
-
- short = self.shortcut(
- input,
- num_filters * 4,
- stride,
- is_first=False,
- name=name + "_branch1")
-
- return fluid.layers.elementwise_add(
- x=short, y=conv2, act='relu', name=name + ".add.output.5")
-
- def basic_block(self, input, num_filters, stride, is_first, name):
- conv0 = self.conv_bn_layer(
- input=input,
- num_filters=num_filters,
- filter_size=3,
- act='relu',
- stride=stride,
- name=name + "_branch2a")
- conv1 = self.conv_bn_layer(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
- act=None,
- name=name + "_branch2b")
- short = self.shortcut(
- input, num_filters, stride, is_first, name=name + "_branch1")
- return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
diff --git a/ppocr/modeling/backbones/rec_resnet_vd.py b/ppocr/modeling/backbones/rec_resnet_vd.py
old mode 100755
new mode 100644
index bc58c8ac13a108bc61e398aae8447b6fab966504..d8602498f391a3531fe4187c666a65f882a9d464
--- a/ppocr/modeling/backbones/rec_resnet_vd.py
+++ b/ppocr/modeling/backbones/rec_resnet_vd.py
@@ -1,271 +1,312 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import math
+from paddle import nn, ParamAttr
+from paddle.nn import functional as F
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
+__all__ = ["ResNet"]
-__all__ = [
- "ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd",
- "ResNet152_vd", "ResNet200_vd"
-]
+class ResNet(nn.Layer):
+ def __init__(self, in_channels=3, layers=34):
+ super(ResNet, self).__init__()
+ supported_layers = {
+ 18: {
+ 'depth': [2, 2, 2, 2],
+ 'block_class': BasicBlock
+ },
+ 34: {
+ 'depth': [3, 4, 6, 3],
+ 'block_class': BasicBlock
+ },
+ 50: {
+ 'depth': [3, 4, 6, 3],
+ 'block_class': BottleneckBlock
+ },
+ 101: {
+ 'depth': [3, 4, 23, 3],
+ 'block_class': BottleneckBlock
+ },
+ 152: {
+ 'depth': [3, 8, 36, 3],
+ 'block_class': BottleneckBlock
+ },
+ 200: {
+ 'depth': [3, 12, 48, 3],
+ 'block_class': BottleneckBlock
+ }
+ }
+ assert layers in supported_layers, \
+ "supported layers are {} but input layer is {}".format(supported_layers.keys(), layers)
+ is_3x3 = True
-class ResNet():
- def __init__(self, params):
- self.layers = params['layers']
- self.is_3x3 = True
- supported_layers = [18, 34, 50, 101, 152, 200]
- assert self.layers in supported_layers, \
- "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
-
- def __call__(self, input):
- is_3x3 = self.is_3x3
- layers = self.layers
-
- if layers == 18:
- depth = [2, 2, 2, 2]
- elif layers == 34 or layers == 50:
- depth = [3, 4, 6, 3]
- elif layers == 101:
- depth = [3, 4, 23, 3]
- elif layers == 152:
- depth = [3, 8, 36, 3]
- elif layers == 200:
- depth = [3, 12, 48, 3]
num_filters = [64, 128, 256, 512]
+ depth = supported_layers[layers]['depth']
+ block_class = supported_layers[layers]['block_class']
+ conv = []
if is_3x3 == False:
- conv = self.conv_bn_layer(
- input=input,
- num_filters=64,
- filter_size=7,
- stride=1,
- act='relu')
+ conv.append(
+ ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=64,
+ kernel_size=7,
+ stride=1,
+ act='relu'))
else:
- conv = self.conv_bn_layer(
- input=input,
- num_filters=32,
- filter_size=3,
- stride=1,
- act='relu',
- name='conv1_1')
- conv = self.conv_bn_layer(
- input=conv,
- num_filters=32,
- filter_size=3,
- stride=1,
- act='relu',
- name='conv1_2')
- conv = self.conv_bn_layer(
- input=conv,
- num_filters=64,
- filter_size=3,
- stride=1,
- act='relu',
- name='conv1_3')
+ conv.append(
+ ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ act='relu',
+ name='conv1_1'))
+ conv.append(
+ ConvBNLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ act='relu',
+ name='conv1_2'))
+ conv.append(
+ ConvBNLayer(
+ in_channels=32,
+ out_channels=64,
+ kernel_size=3,
+ stride=1,
+ act='relu',
+ name='conv1_3'))
+ self.conv1 = nn.Sequential(*conv)
- conv = fluid.layers.pool2d(
- input=conv,
- pool_size=3,
- pool_stride=2,
- pool_padding=1,
- pool_type='max')
+ self.pool = nn.MaxPool2d(
+ kernel_size=3,
+ stride=2,
+ padding=1, )
- if layers >= 50:
- for block in range(len(depth)):
- for i in range(depth[block]):
- if layers in [101, 152, 200] and block == 2:
+ block_list = []
+ in_ch = 64
+ for block_index in range(len(depth)):
+ for i in range(depth[block_index]):
+ if layers >= 50:
+ if layers in [101, 152, 200] and block_index == 2:
if i == 0:
- conv_name = "res" + str(block + 2) + "a"
+ conv_name = "res" + str(block_index + 2) + "a"
else:
- conv_name = "res" + str(block + 2) + "b" + str(i)
- else:
- conv_name = "res" + str(block + 2) + chr(97 + i)
-
- if i == 0 and block != 0:
- stride = (2, 1)
- else:
- stride = (1, 1)
-
- conv = self.bottleneck_block(
- input=conv,
- num_filters=num_filters[block],
- stride=stride,
- if_first=block == i == 0,
- name=conv_name)
- else:
- for block in range(len(depth)):
- for i in range(depth[block]):
- conv_name = "res" + str(block + 2) + chr(97 + i)
-
- if i == 0 and block != 0:
- stride = (2, 1)
+ conv_name = "res" + str(block_index +
+ 2) + "b" + str(i)
else:
- stride = (1, 1)
-
- conv = self.basic_block(
- input=conv,
- num_filters=num_filters[block],
+ conv_name = "res" + str(block_index + 2) + chr(97 + i)
+ else:
+ conv_name = "res" + str(block_index + 2) + chr(97 + i)
+ if i == 0 and block_index != 0:
+ stride = (2, 1)
+ else:
+ stride = (1, 1)
+ block_list.append(
+ block_class(
+ in_channels=in_ch,
+ out_channels=num_filters[block_index],
stride=stride,
- if_first=block == i == 0,
- name=conv_name)
+ if_first=block_index == i == 0,
+ name=conv_name))
+ in_ch = block_list[-1].out_channels
+ self.block_list = nn.Sequential(*block_list)
+ self.add_sublayer(sublayer=self.block_list, name="block_list")
+ self.pool_out = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+ self.out_channels = in_ch
- conv = fluid.layers.pool2d(
- input=conv,
- pool_size=2,
- pool_stride=2,
- pool_padding=0,
- pool_type='max')
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.pool(x)
+ x = self.block_list(x)
+ x = self.pool_out(x)
+ return x
- return conv
- def conv_bn_layer(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
+class ConvBNLayer(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ groups=1,
+ act=None,
+ name=None):
+ super(ConvBNLayer, self).__init__()
+ self.conv = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
stride=stride,
- padding=(filter_size - 1) // 2,
+ padding=(kernel_size - 1) // 2,
groups=groups,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
+ weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
- return fluid.layers.batch_norm(
- input=conv,
+ self.bn = nn.BatchNorm(
+ num_channels=out_channels,
act=act,
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
+ param_attr=ParamAttr(name=bn_name + "_scale"),
+ bias_attr=ParamAttr(name=bn_name + "_offset"),
+ moving_mean_name=bn_name + "_mean",
+ moving_variance_name=bn_name + "_variance")
+
+ def __call__(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ return x
- def conv_bn_layer_new(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- pool = fluid.layers.pool2d(
- input=input,
- pool_size=stride,
- pool_stride=stride,
- pool_padding=0,
- pool_type='avg',
- ceil_mode=True)
- conv = fluid.layers.conv2d(
- input=pool,
- num_filters=num_filters,
- filter_size=filter_size,
+class ConvBNLayerNew(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ groups=1,
+ act=None,
+ name=None):
+ super(ConvBNLayerNew, self).__init__()
+ self.pool = nn.AvgPool2d(
+ kernel_size=stride, stride=stride, padding=0, ceil_mode=True)
+
+ self.conv = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
stride=1,
- padding=(filter_size - 1) // 2,
+ padding=(kernel_size - 1) // 2,
groups=groups,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
+ weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
-
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
- return fluid.layers.batch_norm(
- input=conv,
+ self.bn = nn.BatchNorm(
+ num_channels=out_channels,
act=act,
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
+ param_attr=ParamAttr(name=bn_name + "_scale"),
+ bias_attr=ParamAttr(name=bn_name + "_offset"),
+ moving_mean_name=bn_name + "_mean",
+ moving_variance_name=bn_name + "_variance")
+
+ def __call__(self, x):
+ x = self.pool(x)
+ x = self.conv(x)
+ x = self.bn(x)
+ return x
+
+
+class ShortCut(nn.Layer):
+ def __init__(self, in_channels, out_channels, stride, name, if_first=False):
+ super(ShortCut, self).__init__()
+ self.use_conv = True
- def shortcut(self, input, ch_out, stride, name, if_first=False):
- ch_in = input.shape[1]
- if ch_in != ch_out or stride[0] != 1:
+ if in_channels != out_channels or stride[0] != 1:
if if_first:
- return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+ self.conv = ConvBNLayer(
+ in_channels, out_channels, 1, stride, name=name)
else:
- return self.conv_bn_layer_new(
- input, ch_out, 1, stride, name=name)
+ self.conv = ConvBNLayerNew(
+ in_channels, out_channels, 1, stride, name=name)
elif if_first:
- return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+ self.conv = ConvBNLayer(
+ in_channels, out_channels, 1, stride, name=name)
else:
- return input
+ self.use_conv = False
- def bottleneck_block(self, input, num_filters, stride, name, if_first):
- conv0 = self.conv_bn_layer(
- input=input,
- num_filters=num_filters,
- filter_size=1,
+ def forward(self, x):
+ if self.use_conv:
+ x = self.conv(x)
+ return x
+
+
+class BottleneckBlock(nn.Layer):
+ def __init__(self, in_channels, out_channels, stride, name, if_first):
+ super(BottleneckBlock, self).__init__()
+ self.conv0 = ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=1,
act='relu',
name=name + "_branch2a")
- conv1 = self.conv_bn_layer(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
+ self.conv1 = ConvBNLayer(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
- conv2 = self.conv_bn_layer(
- input=conv1,
- num_filters=num_filters * 4,
- filter_size=1,
+ self.conv2 = ConvBNLayer(
+ in_channels=out_channels,
+ out_channels=out_channels * 4,
+ kernel_size=1,
act=None,
name=name + "_branch2c")
- short = self.shortcut(
- input,
- num_filters * 4,
- stride,
+ self.short = ShortCut(
+ in_channels=in_channels,
+ out_channels=out_channels * 4,
+ stride=stride,
if_first=if_first,
name=name + "_branch1")
+ self.out_channels = out_channels * 4
- return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
+ def forward(self, x):
+ y = self.conv0(x)
+ y = self.conv1(y)
+ y = self.conv2(y)
+ y = y + self.short(x)
+ y = F.relu(y)
+ return y
- def basic_block(self, input, num_filters, stride, name, if_first):
- conv0 = self.conv_bn_layer(
- input=input,
- num_filters=num_filters,
- filter_size=3,
+
+class BasicBlock(nn.Layer):
+ def __init__(self, in_channels, out_channels, stride, name, if_first):
+ super(BasicBlock, self).__init__()
+ self.conv0 = ConvBNLayer(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=3,
act='relu',
stride=stride,
name=name + "_branch2a")
- conv1 = self.conv_bn_layer(
- input=conv0,
- num_filters=num_filters,
- filter_size=3,
+ self.conv1 = ConvBNLayer(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=3,
act=None,
name=name + "_branch2b")
- short = self.shortcut(
- input,
- num_filters,
- stride,
+ self.short = ShortCut(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ stride=stride,
if_first=if_first,
name=name + "_branch1")
- return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
+ self.out_channels = out_channels
+
+ def forward(self, x):
+ y = self.conv0(x)
+ y = self.conv1(y)
+ y = y + self.short(x)
+ return F.relu(y)
diff --git a/ppocr/modeling/common_functions.py b/ppocr/modeling/common_functions.py
deleted file mode 100755
index 2ebcb0427a9a7e8d994f563981ad7fad32ad77b9..0000000000000000000000000000000000000000
--- a/ppocr/modeling/common_functions.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-import math
-
-
-def get_para_bias_attr(l2_decay, k, name):
- regularizer = fluid.regularizer.L2Decay(l2_decay)
- stdv = 1.0 / math.sqrt(k * 1.0)
- initializer = fluid.initializer.Uniform(-stdv, stdv)
- para_attr = fluid.ParamAttr(
- regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
- bias_attr = fluid.ParamAttr(
- regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
- return [para_attr, bias_attr]
-
-
-def conv_bn_layer(input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=(filter_size - 1) // 2,
- groups=groups,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
- bias_attr=False,
- name=name + '.conv2d')
-
- bn_name = "bn_" + name
- return fluid.layers.batch_norm(
- input=conv,
- act=act,
- name=bn_name + '.output',
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
-
-
-def deconv_bn_layer(input,
- num_filters,
- filter_size=4,
- stride=2,
- act='relu',
- name=None):
- deconv = fluid.layers.conv2d_transpose(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=1,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
- bias_attr=False,
- name=name + '.deconv2d')
- bn_name = "bn_" + name
- return fluid.layers.batch_norm(
- input=deconv,
- act=act,
- name=bn_name + '.output',
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
-
-
-def create_tmp_var(program, name, dtype, shape, lod_level=0):
- return program.current_block().create_var(
- name=name, dtype=dtype, shape=shape, lod_level=lod_level)
diff --git a/ppocr/modeling/heads/__init__.py b/ppocr/modeling/heads/__init__.py
index abf198b97e6e818e1fbe59006f98492640bcee54..bed7068d907ba30d7b4fce71e2fdccf7a692463f 100755
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -11,3 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+__all__ = ['build_head']
+
+
+def build_head(config):
+ # det head
+ from .det_db_head import DBHead
+
+ # rec head
+ from .rec_ctc_head import CTC
+ support_dict = ['DBHead', 'CTC']
+
+ module_name = config.pop('name')
+ assert module_name in support_dict, Exception('head only support {}'.format(
+ support_dict))
+ module_class = eval(module_name)(**config)
+ return module_class
diff --git a/ppocr/modeling/heads/det_db_head.py b/ppocr/modeling/heads/det_db_head.py
index 56998044d8923a2bbda094e01b5a4eb2f5496bb3..85149abd6f7d4f9c89c2ccfbba944c53e60093a0 100644
--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
@@ -1,205 +1,128 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
-import paddle.fluid as fluid
+def get_bias_attr(k, name):
+ stdv = 1.0 / math.sqrt(k * 1.0)
+ initializer = paddle.nn.initializer.Uniform(-stdv, stdv)
+ bias_attr = ParamAttr(initializer=initializer, name=name + "_b_attr")
+ return bias_attr
-class DBHead(object):
- """
- Differentiable Binarization (DB) for text detection:
- see https://arxiv.org/abs/1911.08947
- args:
- params(dict): super parameters for build DB network
- """
-
- def __init__(self, params):
- self.k = params['k']
- self.inner_channels = params['inner_channels']
- self.C, self.H, self.W = params['image_shape']
- print(self.C, self.H, self.W)
- def binarize(self, x):
- conv1 = fluid.layers.conv2d(
- input=x,
- num_filters=self.inner_channels // 4,
- filter_size=3,
+class Head(nn.Layer):
+ def __init__(self, in_channels, name_list):
+ super(Head, self).__init__()
+ self.conv1 = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=in_channels // 4,
+ kernel_size=3,
padding=1,
- param_attr=fluid.initializer.MSRAInitializer(uniform=False),
+ weight_attr=ParamAttr(name=name_list[0] + '.w_0'),
bias_attr=False)
- conv_bn1 = fluid.layers.batch_norm(
- input=conv1,
- param_attr=fluid.initializer.ConstantInitializer(value=1.0),
- bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
- act="relu")
- conv2 = fluid.layers.conv2d_transpose(
- input=conv_bn1,
- num_filters=self.inner_channels // 4,
- filter_size=2,
+ self.conv_bn1 = nn.BatchNorm(
+ num_channels=in_channels // 4,
+ param_attr=ParamAttr(
+ name=name_list[1] + '.w_0',
+ initializer=paddle.nn.initializer.Constant(value=1.0)),
+ bias_attr=ParamAttr(
+ name=name_list[1] + '.b_0',
+ initializer=paddle.nn.initializer.Constant(value=1e-4)),
+ moving_mean_name=name_list[1] + '.w_1',
+ moving_variance_name=name_list[1] + '.w_2',
+ act='relu')
+ self.conv2 = nn.ConvTranspose2d(
+ in_channels=in_channels // 4,
+ out_channels=in_channels // 4,
+ kernel_size=2,
stride=2,
- param_attr=fluid.initializer.MSRAInitializer(uniform=False),
- bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
- act=None)
- conv_bn2 = fluid.layers.batch_norm(
- input=conv2,
- param_attr=fluid.initializer.ConstantInitializer(value=1.0),
- bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
+ weight_attr=ParamAttr(
+ name=name_list[2] + '.w_0',
+ initializer=paddle.nn.initializer.MSRA(uniform=False)),
+ bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv2"))
+ self.conv_bn2 = nn.BatchNorm(
+ num_channels=in_channels // 4,
+ param_attr=ParamAttr(
+ name=name_list[3] + '.w_0',
+ initializer=paddle.nn.initializer.Constant(value=1.0)),
+ bias_attr=ParamAttr(
+ name=name_list[3] + '.b_0',
+ initializer=paddle.nn.initializer.Constant(value=1e-4)),
+ moving_mean_name=name_list[3] + '.w_1',
+ moving_variance_name=name_list[3] + '.w_2',
act="relu")
- conv3 = fluid.layers.conv2d_transpose(
- input=conv_bn2,
- num_filters=1,
- filter_size=2,
+ self.conv3 = nn.ConvTranspose2d(
+ in_channels=in_channels // 4,
+ out_channels=1,
+ kernel_size=2,
stride=2,
- param_attr=fluid.initializer.MSRAInitializer(uniform=False),
- bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
- act=None)
- out = fluid.layers.sigmoid(conv3)
- return out
+ weight_attr=ParamAttr(
+ name=name_list[4] + '.w_0',
+ initializer=paddle.nn.initializer.MSRA(uniform=False)),
+ bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv3"),
+ )
- def thresh(self, x):
- conv1 = fluid.layers.conv2d(
- input=x,
- num_filters=self.inner_channels // 4,
- filter_size=3,
- padding=1,
- param_attr=fluid.initializer.MSRAInitializer(uniform=False),
- bias_attr=False)
- conv_bn1 = fluid.layers.batch_norm(
- input=conv1,
- param_attr=fluid.initializer.ConstantInitializer(value=1.0),
- bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
- act="relu")
- conv2 = fluid.layers.conv2d_transpose(
- input=conv_bn1,
- num_filters=self.inner_channels // 4,
- filter_size=2,
- stride=2,
- param_attr=fluid.initializer.MSRAInitializer(uniform=False),
- bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
- act=None)
- conv_bn2 = fluid.layers.batch_norm(
- input=conv2,
- param_attr=fluid.initializer.ConstantInitializer(value=1.0),
- bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
- act="relu")
- conv3 = fluid.layers.conv2d_transpose(
- input=conv_bn2,
- num_filters=1,
- filter_size=2,
- stride=2,
- param_attr=fluid.initializer.MSRAInitializer(uniform=False),
- bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
- act=None)
- out = fluid.layers.sigmoid(conv3)
- return out
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.conv_bn1(x)
+ x = self.conv2(x)
+ x = self.conv_bn2(x)
+ x = self.conv3(x)
+ x = F.sigmoid(x)
+ return x
- def _get_bias_attr(self, l2_decay, k, name, gradient_clip=None):
- regularizer = fluid.regularizer.L2Decay(l2_decay)
- stdv = 1.0 / math.sqrt(k * 1.0)
- initializer = fluid.initializer.Uniform(-stdv, stdv)
- bias_attr = fluid.ParamAttr(
- regularizer=regularizer,
- initializer=initializer,
- name=name + "_b_attr")
- return bias_attr
- def step_function(self, x, y):
- return fluid.layers.reciprocal(1 + fluid.layers.exp(-self.k * (x - y)))
+class DBHead(nn.Layer):
+ """
+ Differentiable Binarization (DB) for text detection:
+ see https://arxiv.org/abs/1911.08947
+ args:
+ params(dict): super parameters for build DB network
+ """
- def __call__(self, conv_features, mode="train"):
- c2, c3, c4, c5 = conv_features
- param_attr = fluid.initializer.MSRAInitializer(uniform=False)
- in5 = fluid.layers.conv2d(
- input=c5,
- num_filters=self.inner_channels,
- filter_size=1,
- param_attr=param_attr,
- bias_attr=False)
- in4 = fluid.layers.conv2d(
- input=c4,
- num_filters=self.inner_channels,
- filter_size=1,
- param_attr=param_attr,
- bias_attr=False)
- in3 = fluid.layers.conv2d(
- input=c3,
- num_filters=self.inner_channels,
- filter_size=1,
- param_attr=param_attr,
- bias_attr=False)
- in2 = fluid.layers.conv2d(
- input=c2,
- num_filters=self.inner_channels,
- filter_size=1,
- param_attr=param_attr,
- bias_attr=False)
+ def __init__(self, in_channels, k=50, **kwargs):
+ super(DBHead, self).__init__()
+ self.k = k
+ binarize_name_list = [
+ 'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48',
+ 'conv2d_transpose_1', 'binarize'
+ ]
+ thresh_name_list = [
+ 'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50',
+ 'conv2d_transpose_3', 'thresh'
+ ]
+ self.binarize = Head(in_channels, binarize_name_list)
+ self.thresh = Head(in_channels, thresh_name_list)
- out4 = fluid.layers.elementwise_add(
- x=fluid.layers.resize_nearest(
- input=in5, scale=2), y=in4) # 1/16
- out3 = fluid.layers.elementwise_add(
- x=fluid.layers.resize_nearest(
- input=out4, scale=2), y=in3) # 1/8
- out2 = fluid.layers.elementwise_add(
- x=fluid.layers.resize_nearest(
- input=out3, scale=2), y=in2) # 1/4
+ def step_function(self, x, y):
+ return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y)))
- p5 = fluid.layers.conv2d(
- input=in5,
- num_filters=self.inner_channels // 4,
- filter_size=3,
- padding=1,
- param_attr=param_attr,
- bias_attr=False)
- p5 = fluid.layers.resize_nearest(input=p5, scale=8)
- p4 = fluid.layers.conv2d(
- input=out4,
- num_filters=self.inner_channels // 4,
- filter_size=3,
- padding=1,
- param_attr=param_attr,
- bias_attr=False)
- p4 = fluid.layers.resize_nearest(input=p4, scale=4)
- p3 = fluid.layers.conv2d(
- input=out3,
- num_filters=self.inner_channels // 4,
- filter_size=3,
- padding=1,
- param_attr=param_attr,
- bias_attr=False)
- p3 = fluid.layers.resize_nearest(input=p3, scale=2)
- p2 = fluid.layers.conv2d(
- input=out2,
- num_filters=self.inner_channels // 4,
- filter_size=3,
- padding=1,
- param_attr=param_attr,
- bias_attr=False)
+ def forward(self, x):
+ shrink_maps = self.binarize(x)
+ if not self.training:
+ return shrink_maps
- fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1)
- shrink_maps = self.binarize(fuse)
- if mode != "train":
- return {"maps": shrink_maps}
- threshold_maps = self.thresh(fuse)
+ threshold_maps = self.thresh(x)
binary_maps = self.step_function(shrink_maps, threshold_maps)
- y = fluid.layers.concat(
- input=[shrink_maps, threshold_maps, binary_maps], axis=1)
- predicts = {}
- predicts['maps'] = y
- return predicts
+ y = paddle.concat([shrink_maps, threshold_maps, binary_maps], axis=1)
+ return y
diff --git a/ppocr/modeling/heads/det_east_head.py b/ppocr/modeling/heads/det_east_head.py
deleted file mode 100755
index de6ed51db72aee297e5cec7b89b49b9a0f55cd30..0000000000000000000000000000000000000000
--- a/ppocr/modeling/heads/det_east_head.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-from ..common_functions import conv_bn_layer, deconv_bn_layer
-from collections import OrderedDict
-
-
-class EASTHead(object):
- """
- EAST: An Efficient and Accurate Scene Text Detector
- see arxiv: https://arxiv.org/abs/1704.03155
- args:
- params(dict): the super parameters for network build
- """
-
- def __init__(self, params):
-
- self.model_name = params['model_name']
-
- def unet_fusion(self, inputs):
- f = inputs[::-1]
- if self.model_name == "large":
- num_outputs = [128, 128, 128, 128]
- else:
- num_outputs = [64, 64, 64, 64]
- g = [None, None, None, None]
- h = [None, None, None, None]
- for i in range(4):
- if i == 0:
- h[i] = f[i]
- else:
- h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1)
- h[i] = conv_bn_layer(
- input=h[i],
- num_filters=num_outputs[i],
- filter_size=3,
- stride=1,
- act='relu',
- name="unet_h_%d" % (i))
- if i <= 2:
- #can be replaced with unpool
- g[i] = deconv_bn_layer(
- input=h[i],
- num_filters=num_outputs[i],
- name="unet_g_%d" % (i))
- else:
- g[i] = conv_bn_layer(
- input=h[i],
- num_filters=num_outputs[i],
- filter_size=3,
- stride=1,
- act='relu',
- name="unet_g_%d" % (i))
- return g[3]
-
- def detector_header(self, f_common):
- if self.model_name == "large":
- num_outputs = [128, 64, 1, 8]
- else:
- num_outputs = [64, 32, 1, 8]
- f_det = conv_bn_layer(
- input=f_common,
- num_filters=num_outputs[0],
- filter_size=3,
- stride=1,
- act='relu',
- name="det_head1")
- f_det = conv_bn_layer(
- input=f_det,
- num_filters=num_outputs[1],
- filter_size=3,
- stride=1,
- act='relu',
- name="det_head2")
- #f_score
- f_score = conv_bn_layer(
- input=f_det,
- num_filters=num_outputs[2],
- filter_size=1,
- stride=1,
- act=None,
- name="f_score")
- f_score = fluid.layers.sigmoid(f_score)
- #f_geo
- f_geo = conv_bn_layer(
- input=f_det,
- num_filters=num_outputs[3],
- filter_size=1,
- stride=1,
- act=None,
- name="f_geo")
- f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800
- return f_score, f_geo
-
- def __call__(self, inputs):
- f_common = self.unet_fusion(inputs)
- f_score, f_geo = self.detector_header(f_common)
- predicts = OrderedDict()
- predicts['f_score'] = f_score
- predicts['f_geo'] = f_geo
- return predicts
diff --git a/ppocr/modeling/heads/det_sast_head.py b/ppocr/modeling/heads/det_sast_head.py
deleted file mode 100644
index 0097913dd7e08c76c45064940416e7c9ffc32f26..0000000000000000000000000000000000000000
--- a/ppocr/modeling/heads/det_sast_head.py
+++ /dev/null
@@ -1,228 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-from ..common_functions import conv_bn_layer, deconv_bn_layer
-from collections import OrderedDict
-
-
-class SASTHead(object):
- """
- SAST:
- see arxiv: https://arxiv.org/abs/1908.05498
- args:
- params(dict): the super parameters for network build
- """
-
- def __init__(self, params):
- self.model_name = params['model_name']
- self.with_cab = params['with_cab']
-
- def FPN_Up_Fusion(self, blocks):
- """
- blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
- 1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
- """
- f = [blocks['block_6'], blocks['block_5'], blocks['block_4'], blocks['block_3'], blocks['block_2']]
- num_outputs = [256, 256, 192, 192, 128]
- g = [None, None, None, None, None]
- h = [None, None, None, None, None]
- for i in range(5):
- h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i],
- filter_size=1, stride=1, act=None, name='fpn_up_h'+str(i))
-
- for i in range(4):
- if i == 0:
- g[i] = deconv_bn_layer(input=h[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g0')
- #print("g[{}] shape: {}".format(i, g[i].shape))
- else:
- g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
- g[i] = fluid.layers.relu(g[i])
- #g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
- # filter_size=1, stride=1, act='relu')
- g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
- filter_size=3, stride=1, act='relu', name='fpn_up_g%d_1'%i)
- g[i] = deconv_bn_layer(input=g[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g%d_2'%i)
- #print("g[{}] shape: {}".format(i, g[i].shape))
-
- g[4] = fluid.layers.elementwise_add(x=g[3], y=h[4])
- g[4] = fluid.layers.relu(g[4])
- g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4],
- filter_size=3, stride=1, act='relu', name='fpn_up_fusion_1')
- g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4],
- filter_size=1, stride=1, act=None, name='fpn_up_fusion_2')
-
- return g[4]
-
- def FPN_Down_Fusion(self, blocks):
- """
- blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
- 1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
- """
- f = [blocks['block_0'], blocks['block_1'], blocks['block_2']]
- num_outputs = [32, 64, 128]
- g = [None, None, None]
- h = [None, None, None]
- for i in range(3):
- h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i],
- filter_size=3, stride=1, act=None, name='fpn_down_h'+str(i))
- for i in range(2):
- if i == 0:
- g[i] = conv_bn_layer(input=h[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g0')
- else:
- g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
- g[i] = fluid.layers.relu(g[i])
- g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i], filter_size=3, stride=1, act='relu', name='fpn_down_g%d_1'%i)
- g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g%d_2'%i)
- # print("g[{}] shape: {}".format(i, g[i].shape))
- g[2] = fluid.layers.elementwise_add(x=g[1], y=h[2])
- g[2] = fluid.layers.relu(g[2])
- g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2],
- filter_size=3, stride=1, act='relu', name='fpn_down_fusion_1')
- g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2],
- filter_size=1, stride=1, act=None, name='fpn_down_fusion_2')
- return g[2]
-
- def SAST_Header1(self, f_common):
- """Detector header."""
- #f_score
- f_score = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_score1')
- f_score = conv_bn_layer(input=f_score, num_filters=64, filter_size=3, stride=1, act='relu', name='f_score2')
- f_score = conv_bn_layer(input=f_score, num_filters=128, filter_size=1, stride=1, act='relu', name='f_score3')
- f_score = conv_bn_layer(input=f_score, num_filters=1, filter_size=3, stride=1, name='f_score4')
- f_score = fluid.layers.sigmoid(f_score)
- # print("f_score shape: {}".format(f_score.shape))
-
- #f_boder
- f_border = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_border1')
- f_border = conv_bn_layer(input=f_border, num_filters=64, filter_size=3, stride=1, act='relu', name='f_border2')
- f_border = conv_bn_layer(input=f_border, num_filters=128, filter_size=1, stride=1, act='relu', name='f_border3')
- f_border = conv_bn_layer(input=f_border, num_filters=4, filter_size=3, stride=1, name='f_border4')
- # print("f_border shape: {}".format(f_border.shape))
-
- return f_score, f_border
-
- def SAST_Header2(self, f_common):
- """Detector header."""
- #f_tvo
- f_tvo = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tvo1')
- f_tvo = conv_bn_layer(input=f_tvo, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tvo2')
- f_tvo = conv_bn_layer(input=f_tvo, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tvo3')
- f_tvo = conv_bn_layer(input=f_tvo, num_filters=8, filter_size=3, stride=1, name='f_tvo4')
- # print("f_tvo shape: {}".format(f_tvo.shape))
-
- #f_tco
- f_tco = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tco1')
- f_tco = conv_bn_layer(input=f_tco, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tco2')
- f_tco = conv_bn_layer(input=f_tco, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tco3')
- f_tco = conv_bn_layer(input=f_tco, num_filters=2, filter_size=3, stride=1, name='f_tco4')
- # print("f_tco shape: {}".format(f_tco.shape))
-
- return f_tvo, f_tco
-
- def cross_attention(self, f_common):
- """
- """
- f_shape = fluid.layers.shape(f_common)
- f_theta = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_theta')
- f_phi = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_phi')
- f_g = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_g')
- ### horizon
- fh_theta = f_theta
- fh_phi = f_phi
- fh_g = f_g
- #flatten
- fh_theta = fluid.layers.transpose(fh_theta, [0, 2, 3, 1])
- fh_theta = fluid.layers.reshape(fh_theta, [f_shape[0] * f_shape[2], f_shape[3], 128])
- fh_phi = fluid.layers.transpose(fh_phi, [0, 2, 3, 1])
- fh_phi = fluid.layers.reshape(fh_phi, [f_shape[0] * f_shape[2], f_shape[3], 128])
- fh_g = fluid.layers.transpose(fh_g, [0, 2, 3, 1])
- fh_g = fluid.layers.reshape(fh_g, [f_shape[0] * f_shape[2], f_shape[3], 128])
- #correlation
- fh_attn = fluid.layers.matmul(fh_theta, fluid.layers.transpose(fh_phi, [0, 2, 1]))
- #scale
- fh_attn = fh_attn / (128 ** 0.5)
- fh_attn = fluid.layers.softmax(fh_attn)
- #weighted sum
- fh_weight = fluid.layers.matmul(fh_attn, fh_g)
- fh_weight = fluid.layers.reshape(fh_weight, [f_shape[0], f_shape[2], f_shape[3], 128])
- # print("fh_weight: {}".format(fh_weight.shape))
- fh_weight = fluid.layers.transpose(fh_weight, [0, 3, 1, 2])
- fh_weight = conv_bn_layer(input=fh_weight, num_filters=128, filter_size=1, stride=1, name='fh_weight')
- #short cut
- fh_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fh_sc')
- f_h = fluid.layers.relu(fh_weight + fh_sc)
- ######
- #vertical
- fv_theta = fluid.layers.transpose(f_theta, [0, 1, 3, 2])
- fv_phi = fluid.layers.transpose(f_phi, [0, 1, 3, 2])
- fv_g = fluid.layers.transpose(f_g, [0, 1, 3, 2])
- #flatten
- fv_theta = fluid.layers.transpose(fv_theta, [0, 2, 3, 1])
- fv_theta = fluid.layers.reshape(fv_theta, [f_shape[0] * f_shape[3], f_shape[2], 128])
- fv_phi = fluid.layers.transpose(fv_phi, [0, 2, 3, 1])
- fv_phi = fluid.layers.reshape(fv_phi, [f_shape[0] * f_shape[3], f_shape[2], 128])
- fv_g = fluid.layers.transpose(fv_g, [0, 2, 3, 1])
- fv_g = fluid.layers.reshape(fv_g, [f_shape[0] * f_shape[3], f_shape[2], 128])
- #correlation
- fv_attn = fluid.layers.matmul(fv_theta, fluid.layers.transpose(fv_phi, [0, 2, 1]))
- #scale
- fv_attn = fv_attn / (128 ** 0.5)
- fv_attn = fluid.layers.softmax(fv_attn)
- #weighted sum
- fv_weight = fluid.layers.matmul(fv_attn, fv_g)
- fv_weight = fluid.layers.reshape(fv_weight, [f_shape[0], f_shape[3], f_shape[2], 128])
- # print("fv_weight: {}".format(fv_weight.shape))
- fv_weight = fluid.layers.transpose(fv_weight, [0, 3, 2, 1])
- fv_weight = conv_bn_layer(input=fv_weight, num_filters=128, filter_size=1, stride=1, name='fv_weight')
- #short cut
- fv_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fv_sc')
- f_v = fluid.layers.relu(fv_weight + fv_sc)
- ######
- f_attn = fluid.layers.concat([f_h, f_v], axis=1)
- f_attn = conv_bn_layer(input=f_attn, num_filters=128, filter_size=1, stride=1, act='relu', name='f_attn')
- return f_attn
-
- def __call__(self, blocks, with_cab=False):
- # for k, v in blocks.items():
- # print(k, v.shape)
-
- #down fpn
- f_down = self.FPN_Down_Fusion(blocks)
- # print("f_down shape: {}".format(f_down.shape))
- #up fpn
- f_up = self.FPN_Up_Fusion(blocks)
- # print("f_up shape: {}".format(f_up.shape))
- #fusion
- f_common = fluid.layers.elementwise_add(x=f_down, y=f_up)
- f_common = fluid.layers.relu(f_common)
- # print("f_common: {}".format(f_common.shape))
-
- if self.with_cab:
- # print('enhence f_common with CAB.')
- f_common = self.cross_attention(f_common)
-
- f_score, f_border= self.SAST_Header1(f_common)
- f_tvo, f_tco = self.SAST_Header2(f_common)
-
- predicts = OrderedDict()
- predicts['f_score'] = f_score
- predicts['f_border'] = f_border
- predicts['f_tvo'] = f_tvo
- predicts['f_tco'] = f_tco
- return predicts
\ No newline at end of file
diff --git a/ppocr/modeling/heads/rec_attention_head.py b/ppocr/modeling/heads/rec_attention_head.py
deleted file mode 100755
index 66c8f3000fe576dd00d6f877c9897feda754cf98..0000000000000000000000000000000000000000
--- a/ppocr/modeling/heads/rec_attention_head.py
+++ /dev/null
@@ -1,237 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from .rec_seq_encoder import SequenceEncoder
-import numpy as np
-
-
-class AttentionPredict(object):
- def __init__(self, params):
- super(AttentionPredict, self).__init__()
- self.char_num = params['char_num']
- self.encoder = SequenceEncoder(params)
- self.decoder_size = params['Attention']['decoder_size']
- self.word_vector_dim = params['Attention']['word_vector_dim']
- self.encoder_type = params['encoder_type']
- self.max_length = params['max_text_length']
-
- def simple_attention(self, encoder_vec, encoder_proj, decoder_state,
- decoder_size):
- decoder_state_proj = layers.fc(input=decoder_state,
- size=decoder_size,
- bias_attr=False,
- name="decoder_state_proj_fc")
- decoder_state_expand = layers.sequence_expand(
- x=decoder_state_proj, y=encoder_proj)
- concated = layers.elementwise_add(encoder_proj, decoder_state_expand)
- concated = layers.tanh(x=concated)
- attention_weights = layers.fc(input=concated,
- size=1,
- act=None,
- bias_attr=False,
- name="attention_weights_fc")
- attention_weights = layers.sequence_softmax(input=attention_weights)
- weigths_reshape = layers.reshape(x=attention_weights, shape=[-1])
- scaled = layers.elementwise_mul(
- x=encoder_vec, y=weigths_reshape, axis=0)
- context = layers.sequence_pool(input=scaled, pool_type='sum')
- return context
-
- def gru_decoder_with_attention(self, target_embedding, encoder_vec,
- encoder_proj, decoder_boot, decoder_size,
- char_num):
- rnn = layers.DynamicRNN()
- with rnn.block():
- current_word = rnn.step_input(target_embedding)
- encoder_vec = rnn.static_input(encoder_vec)
- encoder_proj = rnn.static_input(encoder_proj)
- hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
- context = self.simple_attention(encoder_vec, encoder_proj,
- hidden_mem, decoder_size)
- fc_1 = layers.fc(input=context,
- size=decoder_size * 3,
- bias_attr=False,
- name="rnn_fc1")
- fc_2 = layers.fc(input=current_word,
- size=decoder_size * 3,
- bias_attr=False,
- name="rnn_fc2")
- decoder_inputs = fc_1 + fc_2
- h, _, _ = layers.gru_unit(
- input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
- rnn.update_memory(hidden_mem, h)
- out = layers.fc(input=h,
- size=char_num,
- bias_attr=True,
- act='softmax',
- name="rnn_out_fc")
- rnn.output(out)
- return rnn()
-
- def gru_attention_infer(self, decoder_boot, max_length, char_num,
- word_vector_dim, encoded_vector, encoded_proj,
- decoder_size):
- init_state = decoder_boot
- beam_size = 1
- array_len = layers.fill_constant(
- shape=[1], dtype='int64', value=max_length)
- counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True)
-
- # fill the first element with init_state
- state_array = layers.create_array('float32')
- layers.array_write(init_state, array=state_array, i=counter)
-
- # ids, scores as memory
- ids_array = layers.create_array('int64')
- scores_array = layers.create_array('float32')
- rois_shape = layers.shape(init_state)
- batch_size = layers.slice(
- rois_shape, axes=[0], starts=[0], ends=[1]) + 1
- lod_level = layers.range(
- start=0, end=batch_size, step=1, dtype=batch_size.dtype)
-
- init_ids = layers.fill_constant_batch_size_like(
- input=init_state, shape=[-1, 1], value=0, dtype='int64')
- init_ids = layers.lod_reset(init_ids, lod_level)
- init_ids = layers.lod_append(init_ids, lod_level)
-
- init_scores = layers.fill_constant_batch_size_like(
- input=init_state, shape=[-1, 1], value=1, dtype='float32')
- init_scores = layers.lod_reset(init_scores, init_ids)
- layers.array_write(init_ids, array=ids_array, i=counter)
- layers.array_write(init_scores, array=scores_array, i=counter)
-
- full_ids = fluid.layers.fill_constant_batch_size_like(
- input=init_state, shape=[-1, 1], dtype='int64', value=1)
- full_scores = fluid.layers.fill_constant_batch_size_like(
- input=init_state, shape=[-1, 1], dtype='float32', value=1)
-
- cond = layers.less_than(x=counter, y=array_len)
- while_op = layers.While(cond=cond)
- with while_op.block():
- pre_ids = layers.array_read(array=ids_array, i=counter)
- pre_state = layers.array_read(array=state_array, i=counter)
- pre_score = layers.array_read(array=scores_array, i=counter)
- pre_ids_emb = layers.embedding(
- input=pre_ids,
- size=[char_num, word_vector_dim],
- dtype='float32')
-
- context = self.simple_attention(encoded_vector, encoded_proj,
- pre_state, decoder_size)
-
- # expand the recursive_sequence_lengths of pre_state
- # to be the same with pre_score
- pre_state_expanded = layers.sequence_expand(pre_state, pre_score)
- context_expanded = layers.sequence_expand(context, pre_score)
-
- fc_1 = layers.fc(input=context_expanded,
- size=decoder_size * 3,
- bias_attr=False,
- name="rnn_fc1")
-
- fc_2 = layers.fc(input=pre_ids_emb,
- size=decoder_size * 3,
- bias_attr=False,
- name="rnn_fc2")
-
- decoder_inputs = fc_1 + fc_2
- current_state, _, _ = layers.gru_unit(
- input=decoder_inputs,
- hidden=pre_state_expanded,
- size=decoder_size * 3)
- current_state_with_lod = layers.lod_reset(
- x=current_state, y=pre_score)
- # use score to do beam search
- current_score = layers.fc(input=current_state_with_lod,
- size=char_num,
- bias_attr=True,
- act='softmax',
- name="rnn_out_fc")
- topk_scores, topk_indices = layers.topk(current_score, k=beam_size)
-
- new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
- fluid.layers.assign(new_ids, full_ids)
-
- new_scores = fluid.layers.concat([full_scores, topk_scores], axis=1)
- fluid.layers.assign(new_scores, full_scores)
-
- layers.increment(x=counter, value=1, in_place=True)
-
- # update the memories
- layers.array_write(current_state, array=state_array, i=counter)
- layers.array_write(topk_indices, array=ids_array, i=counter)
- layers.array_write(topk_scores, array=scores_array, i=counter)
-
- # update the break condition:
- # up to the max length or all candidates of
- # source sentences have ended.
- length_cond = layers.less_than(x=counter, y=array_len)
- finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
- layers.logical_and(x=length_cond, y=finish_cond, out=cond)
- return full_ids, full_scores
-
- def __call__(self, inputs, labels=None, mode=None):
- encoder_features = self.encoder(inputs)
- char_num = self.char_num
- word_vector_dim = self.word_vector_dim
- decoder_size = self.decoder_size
-
- if self.encoder_type == "reshape":
- encoder_input = encoder_features
- encoded_vector = encoder_features
- else:
- encoder_input = encoder_features[1]
- encoded_vector = layers.concat(encoder_features, axis=1)
- encoded_proj = layers.fc(input=encoded_vector,
- size=decoder_size,
- bias_attr=False,
- name="encoded_proj_fc")
- backward_first = layers.sequence_pool(
- input=encoder_input, pool_type='first')
- decoder_boot = layers.fc(input=backward_first,
- size=decoder_size,
- bias_attr=False,
- act="relu",
- name='decoder_boot')
-
- if mode == "train":
- label_in = labels['label_in']
- label_out = labels['label_out']
- label_in = layers.cast(x=label_in, dtype='int64')
- trg_embedding = layers.embedding(
- input=label_in,
- size=[char_num, word_vector_dim],
- dtype='float32')
- predict = self.gru_decoder_with_attention(
- trg_embedding, encoded_vector, encoded_proj, decoder_boot,
- decoder_size, char_num)
- _, decoded_out = layers.topk(input=predict, k=1)
- decoded_out = layers.lod_reset(decoded_out, y=label_out)
- predicts = {'predict':predict, 'decoded_out':decoded_out}
- else:
- ids, predict = self.gru_attention_infer(
- decoder_boot, self.max_length, char_num, word_vector_dim,
- encoded_vector, encoded_proj, decoder_size)
- predicts = {'predict':predict, 'decoded_out':ids}
- return predicts
diff --git a/ppocr/modeling/heads/rec_ctc_head.py b/ppocr/modeling/heads/rec_ctc_head.py
index 6b8635e4647f186390179b880e132641342df0d6..8c7b904fed741d947e580611de3e9d8cb2f312f4 100755
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@@ -1,16 +1,16 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from __future__ import absolute_import
from __future__ import division
@@ -19,34 +19,33 @@ from __future__ import print_function
import math
import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-from .rec_seq_encoder import SequenceEncoder
-from ..common_functions import get_para_bias_attr
-import numpy as np
-
-
-class CTCPredict(object):
- def __init__(self, params):
- super(CTCPredict, self).__init__()
- self.char_num = params['char_num']
- self.encoder = SequenceEncoder(params)
- self.encoder_type = params['encoder_type']
- self.fc_decay = params.get("fc_decay", 0.0004)
-
- def __call__(self, inputs, labels=None, mode=None):
- encoder_features = self.encoder(inputs)
- if self.encoder_type != "reshape":
- encoder_features = fluid.layers.concat(encoder_features, axis=1)
- name = "ctc_fc"
- para_attr, bias_attr = get_para_bias_attr(
- l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
- predict = fluid.layers.fc(input=encoder_features,
- size=self.char_num + 1,
- param_attr=para_attr,
- bias_attr=bias_attr,
- name=name)
- decoded_out = fluid.layers.ctc_greedy_decoder(
- input=predict, blank=self.char_num)
- predicts = {'predict': predict, 'decoded_out': decoded_out}
+from paddle import ParamAttr, nn
+
+
+def get_para_bias_attr(l2_decay, k, name):
+ regularizer = paddle.fluid.regularizer.L2Decay(l2_decay)
+ stdv = 1.0 / math.sqrt(k * 1.0)
+ initializer = nn.initializer.Uniform(-stdv, stdv)
+ weight_attr = ParamAttr(
+ regularizer=regularizer, initializer=initializer, name=name + "_w_attr")
+ bias_attr = ParamAttr(
+ regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
+ return [weight_attr, bias_attr]
+
+
+class CTC(nn.Layer):
+ def __init__(self, in_channels, out_channels, fc_decay=1e-5, **kwargs):
+ super(CTC, self).__init__()
+ weight_attr, bias_attr = get_para_bias_attr(
+ l2_decay=fc_decay, k=in_channels, name='ctc_fc')
+ self.fc = nn.Linear(
+ in_channels,
+ out_channels,
+ weight_attr=weight_attr,
+ bias_attr=bias_attr,
+ name='ctc_fc')
+ self.out_channels = out_channels
+
+ def forward(self, x, labels=None):
+ predicts = self.fc(x)
return predicts
diff --git a/ppocr/modeling/heads/rec_seq_encoder.py b/ppocr/modeling/heads/rec_seq_encoder.py
deleted file mode 100755
index 0c49667ae7f76ec2ca6981672c760a61bb9fc2d6..0000000000000000000000000000000000000000
--- a/ppocr/modeling/heads/rec_seq_encoder.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-
-
-class EncoderWithReshape(object):
- def __init__(self, params):
- super(EncoderWithReshape, self).__init__()
-
- def __call__(self, inputs):
- sliced_feature = layers.im2sequence(
- input=inputs,
- stride=[1, 1],
- filter_size=[inputs.shape[2], 1],
- name="sliced_feature")
- return sliced_feature
-
-
-class EncoderWithRNN(object):
- def __init__(self, params):
- super(EncoderWithRNN, self).__init__()
- self.rnn_hidden_size = params['SeqRNN']['hidden_size']
-
- def __call__(self, inputs):
- lstm_list = []
- name_prefix = "lstm"
- rnn_hidden_size = self.rnn_hidden_size
- for no in range(1, 3):
- if no == 1:
- is_reverse = False
- else:
- is_reverse = True
- name = "%s_st1_fc%d" % (name_prefix, no)
- fc = layers.fc(input=inputs,
- size=rnn_hidden_size * 4,
- param_attr=fluid.ParamAttr(name=name + "_w"),
- bias_attr=fluid.ParamAttr(name=name + "_b"),
- name=name)
- name = "%s_st1_out%d" % (name_prefix, no)
- lstm, _ = layers.dynamic_lstm(
- input=fc,
- size=rnn_hidden_size * 4,
- is_reverse=is_reverse,
- param_attr=fluid.ParamAttr(name=name + "_w"),
- bias_attr=fluid.ParamAttr(name=name + "_b"),
- use_peepholes=False)
- name = "%s_st2_fc%d" % (name_prefix, no)
- fc = layers.fc(input=lstm,
- size=rnn_hidden_size * 4,
- param_attr=fluid.ParamAttr(name=name + "_w"),
- bias_attr=fluid.ParamAttr(name=name + "_b"),
- name=name)
- name = "%s_st2_out%d" % (name_prefix, no)
- lstm, _ = layers.dynamic_lstm(
- input=fc,
- size=rnn_hidden_size * 4,
- is_reverse=is_reverse,
- param_attr=fluid.ParamAttr(name=name + "_w"),
- bias_attr=fluid.ParamAttr(name=name + "_b"),
- use_peepholes=False)
- lstm_list.append(lstm)
- return lstm_list
-
-
-class SequenceEncoder(object):
- def __init__(self, params):
- super(SequenceEncoder, self).__init__()
- self.encoder_type = params['encoder_type']
- self.encoder_reshape = EncoderWithReshape(params)
- if self.encoder_type == "rnn":
- self.encoder_rnn = EncoderWithRNN(params)
-
- def __call__(self, inputs):
- if self.encoder_type == "reshape":
- encoder_features = self.encoder_reshape(inputs)
- elif self.encoder_type == "rnn":
- inputs = self.encoder_reshape(inputs)
- encoder_features = self.encoder_rnn(inputs)
- else:
- assert False, "Unsupport encoder_type:%s"\
- % self.encoder_type
- return encoder_features
diff --git a/ppocr/modeling/heads/rec_srn_all_head.py b/ppocr/modeling/heads/rec_srn_all_head.py
deleted file mode 100755
index e1bb955d437faca243e3768e24b47f4328218624..0000000000000000000000000000000000000000
--- a/ppocr/modeling/heads/rec_srn_all_head.py
+++ /dev/null
@@ -1,230 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-import numpy as np
-from .self_attention.model import wrap_encoder
-from .self_attention.model import wrap_encoder_forFeature
-gradient_clip = 10
-
-
-class SRNPredict(object):
- def __init__(self, params):
- super(SRNPredict, self).__init__()
- self.char_num = params['char_num']
- self.max_length = params['max_text_length']
-
- self.num_heads = params['num_heads']
- self.num_encoder_TUs = params['num_encoder_TUs']
- self.num_decoder_TUs = params['num_decoder_TUs']
- self.hidden_dims = params['hidden_dims']
-
- def pvam(self, inputs, others):
-
- b, c, h, w = inputs.shape
- conv_features = fluid.layers.reshape(x=inputs, shape=[-1, c, h * w])
- conv_features = fluid.layers.transpose(x=conv_features, perm=[0, 2, 1])
-
- #===== Transformer encoder =====
- b, t, c = conv_features.shape
- encoder_word_pos = others["encoder_word_pos"]
- gsrm_word_pos = others["gsrm_word_pos"]
-
- enc_inputs = [conv_features, encoder_word_pos, None]
- word_features = wrap_encoder_forFeature(
- src_vocab_size=-1,
- max_length=t,
- n_layer=self.num_encoder_TUs,
- n_head=self.num_heads,
- d_key=int(self.hidden_dims / self.num_heads),
- d_value=int(self.hidden_dims / self.num_heads),
- d_model=self.hidden_dims,
- d_inner_hid=self.hidden_dims,
- prepostprocess_dropout=0.1,
- attention_dropout=0.1,
- relu_dropout=0.1,
- preprocess_cmd="n",
- postprocess_cmd="da",
- weight_sharing=True,
- enc_inputs=enc_inputs, )
- fluid.clip.set_gradient_clip(
- fluid.clip.GradientClipByValue(gradient_clip))
-
- #===== Parallel Visual Attention Module =====
- b, t, c = word_features.shape
-
- word_features = fluid.layers.fc(word_features, c, num_flatten_dims=2)
- word_features_ = fluid.layers.reshape(word_features, [-1, 1, t, c])
- word_features_ = fluid.layers.expand(word_features_,
- [1, self.max_length, 1, 1])
- word_pos_feature = fluid.layers.embedding(gsrm_word_pos,
- [self.max_length, c])
- word_pos_ = fluid.layers.reshape(word_pos_feature,
- [-1, self.max_length, 1, c])
- word_pos_ = fluid.layers.expand(word_pos_, [1, 1, t, 1])
- temp = fluid.layers.elementwise_add(
- word_features_, word_pos_, act='tanh')
-
- attention_weight = fluid.layers.fc(input=temp,
- size=1,
- num_flatten_dims=3,
- bias_attr=False)
- attention_weight = fluid.layers.reshape(
- x=attention_weight, shape=[-1, self.max_length, t])
- attention_weight = fluid.layers.softmax(input=attention_weight, axis=-1)
-
- pvam_features = fluid.layers.matmul(attention_weight,
- word_features) #[b, max_length, c]
-
- return pvam_features
-
- def gsrm(self, pvam_features, others):
-
- #===== GSRM Visual-to-semantic embedding block =====
- b, t, c = pvam_features.shape
- word_out = fluid.layers.fc(
- input=fluid.layers.reshape(pvam_features, [-1, c]),
- size=self.char_num,
- act="softmax")
- #word_out.stop_gradient = True
- word_ids = fluid.layers.argmax(word_out, axis=1)
- word_ids.stop_gradient = True
- word_ids = fluid.layers.reshape(x=word_ids, shape=[-1, t, 1])
-
- #===== GSRM Semantic reasoning block =====
- """
- This module is achieved through bi-transformers,
- ngram_feature1 is the froward one, ngram_fetaure2 is the backward one
- """
- pad_idx = self.char_num
- gsrm_word_pos = others["gsrm_word_pos"]
- gsrm_slf_attn_bias1 = others["gsrm_slf_attn_bias1"]
- gsrm_slf_attn_bias2 = others["gsrm_slf_attn_bias2"]
-
- def prepare_bi(word_ids):
- """
- prepare bi for gsrm
- word1 for forward; word2 for backward
- """
- word1 = fluid.layers.cast(word_ids, "float32")
- word1 = fluid.layers.pad(word1, [0, 0, 1, 0, 0, 0],
- pad_value=1.0 * pad_idx)
- word1 = fluid.layers.cast(word1, "int64")
- word1 = word1[:, :-1, :]
- word2 = word_ids
- return word1, word2
-
- word1, word2 = prepare_bi(word_ids)
- word1.stop_gradient = True
- word2.stop_gradient = True
- enc_inputs_1 = [word1, gsrm_word_pos, gsrm_slf_attn_bias1]
- enc_inputs_2 = [word2, gsrm_word_pos, gsrm_slf_attn_bias2]
-
- gsrm_feature1 = wrap_encoder(
- src_vocab_size=self.char_num + 1,
- max_length=self.max_length,
- n_layer=self.num_decoder_TUs,
- n_head=self.num_heads,
- d_key=int(self.hidden_dims / self.num_heads),
- d_value=int(self.hidden_dims / self.num_heads),
- d_model=self.hidden_dims,
- d_inner_hid=self.hidden_dims,
- prepostprocess_dropout=0.1,
- attention_dropout=0.1,
- relu_dropout=0.1,
- preprocess_cmd="n",
- postprocess_cmd="da",
- weight_sharing=True,
- enc_inputs=enc_inputs_1, )
- gsrm_feature2 = wrap_encoder(
- src_vocab_size=self.char_num + 1,
- max_length=self.max_length,
- n_layer=self.num_decoder_TUs,
- n_head=self.num_heads,
- d_key=int(self.hidden_dims / self.num_heads),
- d_value=int(self.hidden_dims / self.num_heads),
- d_model=self.hidden_dims,
- d_inner_hid=self.hidden_dims,
- prepostprocess_dropout=0.1,
- attention_dropout=0.1,
- relu_dropout=0.1,
- preprocess_cmd="n",
- postprocess_cmd="da",
- weight_sharing=True,
- enc_inputs=enc_inputs_2, )
- gsrm_feature2 = fluid.layers.pad(gsrm_feature2, [0, 0, 0, 1, 0, 0],
- pad_value=0.)
- gsrm_feature2 = gsrm_feature2[:, 1:, ]
- gsrm_features = gsrm_feature1 + gsrm_feature2
-
- b, t, c = gsrm_features.shape
-
- gsrm_out = fluid.layers.matmul(
- x=gsrm_features,
- y=fluid.default_main_program().global_block().var(
- "src_word_emb_table"),
- transpose_y=True)
- b, t, c = gsrm_out.shape
- gsrm_out = fluid.layers.softmax(input=fluid.layers.reshape(gsrm_out,
- [-1, c]))
-
- return gsrm_features, word_out, gsrm_out
-
- def vsfd(self, pvam_features, gsrm_features):
-
- #===== Visual-Semantic Fusion Decoder Module =====
- b, t, c1 = pvam_features.shape
- b, t, c2 = gsrm_features.shape
- combine_features_ = fluid.layers.concat(
- [pvam_features, gsrm_features], axis=2)
- img_comb_features_ = fluid.layers.reshape(
- x=combine_features_, shape=[-1, c1 + c2])
- img_comb_features_map = fluid.layers.fc(input=img_comb_features_,
- size=c1,
- act="sigmoid")
- img_comb_features_map = fluid.layers.reshape(
- x=img_comb_features_map, shape=[-1, t, c1])
- combine_features = img_comb_features_map * pvam_features + (
- 1.0 - img_comb_features_map) * gsrm_features
- img_comb_features = fluid.layers.reshape(
- x=combine_features, shape=[-1, c1])
-
- fc_out = fluid.layers.fc(input=img_comb_features,
- size=self.char_num,
- act="softmax")
- return fc_out
-
- def __call__(self, inputs, others, mode=None):
-
- pvam_features = self.pvam(inputs, others)
- gsrm_features, word_out, gsrm_out = self.gsrm(pvam_features, others)
- final_out = self.vsfd(pvam_features, gsrm_features)
-
- _, decoded_out = fluid.layers.topk(input=final_out, k=1)
- predicts = {
- 'predict': final_out,
- 'decoded_out': decoded_out,
- 'word_out': word_out,
- 'gsrm_out': gsrm_out
- }
-
- return predicts
diff --git a/ppocr/modeling/heads/self_attention/__init__.py b/ppocr/modeling/heads/self_attention/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/ppocr/modeling/heads/self_attention/model.py b/ppocr/modeling/heads/self_attention/model.py
deleted file mode 100644
index 8bf34e4ac6a2c3c33d2a46b1f4f9dbfaf8db8f57..0000000000000000000000000000000000000000
--- a/ppocr/modeling/heads/self_attention/model.py
+++ /dev/null
@@ -1,485 +0,0 @@
-from functools import partial
-import numpy as np
-
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-
-encoder_data_input_fields = (
- "src_word",
- "src_pos",
- "src_slf_attn_bias", )
-
-
-def wrap_layer_with_block(layer, block_idx):
- """
- Make layer define support indicating block, by which we can add layers
- to other blocks within current block. This will make it easy to define
- cache among while loop.
- """
-
- class BlockGuard(object):
- """
- BlockGuard class.
-
- BlockGuard class is used to switch to the given block in a program by
- using the Python `with` keyword.
- """
-
- def __init__(self, block_idx=None, main_program=None):
- self.main_program = fluid.default_main_program(
- ) if main_program is None else main_program
- self.old_block_idx = self.main_program.current_block().idx
- self.new_block_idx = block_idx
-
- def __enter__(self):
- self.main_program.current_block_idx = self.new_block_idx
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- self.main_program.current_block_idx = self.old_block_idx
- if exc_type is not None:
- return False # re-raise exception
- return True
-
- def layer_wrapper(*args, **kwargs):
- with BlockGuard(block_idx):
- return layer(*args, **kwargs)
-
- return layer_wrapper
-
-
-def multi_head_attention(queries,
- keys,
- values,
- attn_bias,
- d_key,
- d_value,
- d_model,
- n_head=1,
- dropout_rate=0.,
- cache=None,
- gather_idx=None,
- static_kv=False):
- """
- Multi-Head Attention. Note that attn_bias is added to the logit before
- computing softmax activiation to mask certain selected positions so that
- they will not considered in attention weights.
- """
- keys = queries if keys is None else keys
- values = keys if values is None else values
-
- if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3):
- raise ValueError(
- "Inputs: quries, keys and values should all be 3-D tensors.")
-
- def __compute_qkv(queries, keys, values, n_head, d_key, d_value):
- """
- Add linear projection to queries, keys, and values.
- """
- q = layers.fc(input=queries,
- size=d_key * n_head,
- bias_attr=False,
- num_flatten_dims=2)
- # For encoder-decoder attention in inference, insert the ops and vars
- # into global block to use as cache among beam search.
- fc_layer = wrap_layer_with_block(
- layers.fc, fluid.default_main_program().current_block()
- .parent_idx) if cache is not None and static_kv else layers.fc
- k = fc_layer(
- input=keys,
- size=d_key * n_head,
- bias_attr=False,
- num_flatten_dims=2)
- v = fc_layer(
- input=values,
- size=d_value * n_head,
- bias_attr=False,
- num_flatten_dims=2)
- return q, k, v
-
- def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value):
- """
- Reshape input tensors at the last dimension to split multi-heads
- and then transpose. Specifically, transform the input tensor with shape
- [bs, max_sequence_length, n_head * hidden_dim] to the output tensor
- with shape [bs, n_head, max_sequence_length, hidden_dim].
- """
- # The value 0 in shape attr means copying the corresponding dimension
- # size of the input as the output dimension size.
- reshaped_q = layers.reshape(
- x=queries, shape=[0, 0, n_head, d_key], inplace=True)
- # permuate the dimensions into:
- # [batch_size, n_head, max_sequence_len, hidden_size_per_head]
- q = layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3])
- # For encoder-decoder attention in inference, insert the ops and vars
- # into global block to use as cache among beam search.
- reshape_layer = wrap_layer_with_block(
- layers.reshape,
- fluid.default_main_program().current_block()
- .parent_idx) if cache is not None and static_kv else layers.reshape
- transpose_layer = wrap_layer_with_block(
- layers.transpose,
- fluid.default_main_program().current_block().
- parent_idx) if cache is not None and static_kv else layers.transpose
- reshaped_k = reshape_layer(
- x=keys, shape=[0, 0, n_head, d_key], inplace=True)
- k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3])
- reshaped_v = reshape_layer(
- x=values, shape=[0, 0, n_head, d_value], inplace=True)
- v = transpose_layer(x=reshaped_v, perm=[0, 2, 1, 3])
-
- if cache is not None: # only for faster inference
- if static_kv: # For encoder-decoder attention in inference
- cache_k, cache_v = cache["static_k"], cache["static_v"]
- # To init the static_k and static_v in cache.
- # Maybe we can use condition_op(if_else) to do these at the first
- # step in while loop to replace these, however it might be less
- # efficient.
- static_cache_init = wrap_layer_with_block(
- layers.assign,
- fluid.default_main_program().current_block().parent_idx)
- static_cache_init(k, cache_k)
- static_cache_init(v, cache_v)
- else: # For decoder self-attention in inference
- cache_k, cache_v = cache["k"], cache["v"]
- # gather cell states corresponding to selected parent
- select_k = layers.gather(cache_k, index=gather_idx)
- select_v = layers.gather(cache_v, index=gather_idx)
- if not static_kv:
- # For self attention in inference, use cache and concat time steps.
- select_k = layers.concat([select_k, k], axis=2)
- select_v = layers.concat([select_v, v], axis=2)
- # update cell states(caches) cached in global block
- layers.assign(select_k, cache_k)
- layers.assign(select_v, cache_v)
- return q, select_k, select_v
- return q, k, v
-
- def __combine_heads(x):
- """
- Transpose and then reshape the last two dimensions of inpunt tensor x
- so that it becomes one dimension, which is reverse to __split_heads.
- """
- if len(x.shape) != 4:
- raise ValueError("Input(x) should be a 4-D Tensor.")
-
- trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
- # The value 0 in shape attr means copying the corresponding dimension
- # size of the input as the output dimension size.
- return layers.reshape(
- x=trans_x,
- shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]],
- inplace=True)
-
- def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate):
- """
- Scaled Dot-Product Attention
- """
- # print(q)
- # print(k)
-
- product = layers.matmul(x=q, y=k, transpose_y=True, alpha=d_key**-0.5)
- if attn_bias:
- product += attn_bias
- weights = layers.softmax(product)
- if dropout_rate:
- weights = layers.dropout(
- weights, dropout_prob=dropout_rate, seed=None, is_test=False)
- out = layers.matmul(weights, v)
- return out
-
- q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value)
- q, k, v = __split_heads_qkv(q, k, v, n_head, d_key, d_value)
-
- ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_model,
- dropout_rate)
-
- out = __combine_heads(ctx_multiheads)
-
- # Project back to the model size.
- proj_out = layers.fc(input=out,
- size=d_model,
- bias_attr=False,
- num_flatten_dims=2)
- return proj_out
-
-
-def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate):
- """
- Position-wise Feed-Forward Networks.
- This module consists of two linear transformations with a ReLU activation
- in between, which is applied to each position separately and identically.
- """
- hidden = layers.fc(input=x,
- size=d_inner_hid,
- num_flatten_dims=2,
- act="relu")
- if dropout_rate:
- hidden = layers.dropout(
- hidden, dropout_prob=dropout_rate, seed=None, is_test=False)
- out = layers.fc(input=hidden, size=d_hid, num_flatten_dims=2)
- return out
-
-
-def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.):
- """
- Add residual connection, layer normalization and droput to the out tensor
- optionally according to the value of process_cmd.
- This will be used before or after multi-head attention and position-wise
- feed-forward networks.
- """
- for cmd in process_cmd:
- if cmd == "a": # add residual connection
- out = out + prev_out if prev_out else out
- elif cmd == "n": # add layer normalization
- out = layers.layer_norm(
- out,
- begin_norm_axis=len(out.shape) - 1,
- param_attr=fluid.initializer.Constant(1.),
- bias_attr=fluid.initializer.Constant(0.))
- elif cmd == "d": # add dropout
- if dropout_rate:
- out = layers.dropout(
- out, dropout_prob=dropout_rate, seed=None, is_test=False)
- return out
-
-
-pre_process_layer = partial(pre_post_process_layer, None)
-post_process_layer = pre_post_process_layer
-
-
-def prepare_encoder(
- src_word, # [b,t,c]
- src_pos,
- src_vocab_size,
- src_emb_dim,
- src_max_len,
- dropout_rate=0.,
- bos_idx=0,
- word_emb_param_name=None,
- pos_enc_param_name=None):
- """Add word embeddings and position encodings.
- The output tensor has a shape of:
- [batch_size, max_src_length_in_batch, d_model].
- This module is used at the bottom of the encoder stacks.
- """
-
- src_word_emb = src_word
- src_word_emb = layers.cast(src_word_emb, 'float32')
-
- src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
- src_pos_enc = layers.embedding(
- src_pos,
- size=[src_max_len, src_emb_dim],
- param_attr=fluid.ParamAttr(
- name=pos_enc_param_name, trainable=False))
- src_pos_enc.stop_gradient = True
- enc_input = src_word_emb + src_pos_enc
- return layers.dropout(
- enc_input, dropout_prob=dropout_rate, seed=None,
- is_test=False) if dropout_rate else enc_input
-
-
-def prepare_decoder(src_word,
- src_pos,
- src_vocab_size,
- src_emb_dim,
- src_max_len,
- dropout_rate=0.,
- bos_idx=0,
- word_emb_param_name=None,
- pos_enc_param_name=None):
- """Add word embeddings and position encodings.
- The output tensor has a shape of:
- [batch_size, max_src_length_in_batch, d_model].
- This module is used at the bottom of the encoder stacks.
- """
- src_word_emb = layers.embedding(
- src_word,
- size=[src_vocab_size, src_emb_dim],
- padding_idx=bos_idx, # set embedding of bos to 0
- param_attr=fluid.ParamAttr(
- name=word_emb_param_name,
- initializer=fluid.initializer.Normal(0., src_emb_dim**-0.5)))
-
- src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
- src_pos_enc = layers.embedding(
- src_pos,
- size=[src_max_len, src_emb_dim],
- param_attr=fluid.ParamAttr(
- name=pos_enc_param_name, trainable=False))
- src_pos_enc.stop_gradient = True
- enc_input = src_word_emb + src_pos_enc
- return layers.dropout(
- enc_input, dropout_prob=dropout_rate, seed=None,
- is_test=False) if dropout_rate else enc_input
-
-
-def encoder_layer(enc_input,
- attn_bias,
- n_head,
- d_key,
- d_value,
- d_model,
- d_inner_hid,
- prepostprocess_dropout,
- attention_dropout,
- relu_dropout,
- preprocess_cmd="n",
- postprocess_cmd="da"):
- """The encoder layers that can be stacked to form a deep encoder.
- This module consits of a multi-head (self) attention followed by
- position-wise feed-forward networks and both the two components companied
- with the post_process_layer to add residual connection, layer normalization
- and droput.
- """
- attn_output = multi_head_attention(
- pre_process_layer(enc_input, preprocess_cmd,
- prepostprocess_dropout), None, None, attn_bias, d_key,
- d_value, d_model, n_head, attention_dropout)
- attn_output = post_process_layer(enc_input, attn_output, postprocess_cmd,
- prepostprocess_dropout)
- ffd_output = positionwise_feed_forward(
- pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout),
- d_inner_hid, d_model, relu_dropout)
- return post_process_layer(attn_output, ffd_output, postprocess_cmd,
- prepostprocess_dropout)
-
-
-def encoder(enc_input,
- attn_bias,
- n_layer,
- n_head,
- d_key,
- d_value,
- d_model,
- d_inner_hid,
- prepostprocess_dropout,
- attention_dropout,
- relu_dropout,
- preprocess_cmd="n",
- postprocess_cmd="da"):
- """
- The encoder is composed of a stack of identical layers returned by calling
- encoder_layer.
- """
- for i in range(n_layer):
- enc_output = encoder_layer(
- enc_input,
- attn_bias,
- n_head,
- d_key,
- d_value,
- d_model,
- d_inner_hid,
- prepostprocess_dropout,
- attention_dropout,
- relu_dropout,
- preprocess_cmd,
- postprocess_cmd, )
- enc_input = enc_output
- enc_output = pre_process_layer(enc_output, preprocess_cmd,
- prepostprocess_dropout)
- return enc_output
-
-
-def wrap_encoder_forFeature(src_vocab_size,
- max_length,
- n_layer,
- n_head,
- d_key,
- d_value,
- d_model,
- d_inner_hid,
- prepostprocess_dropout,
- attention_dropout,
- relu_dropout,
- preprocess_cmd,
- postprocess_cmd,
- weight_sharing,
- enc_inputs=None,
- bos_idx=0):
- """
- The wrapper assembles together all needed layers for the encoder.
- img, src_pos, src_slf_attn_bias = enc_inputs
- img
- """
-
- conv_features, src_pos, src_slf_attn_bias = enc_inputs #
- b, t, c = conv_features.shape
-
- enc_input = prepare_encoder(
- conv_features,
- src_pos,
- src_vocab_size,
- d_model,
- max_length,
- prepostprocess_dropout,
- bos_idx=bos_idx,
- word_emb_param_name="src_word_emb_table")
-
- enc_output = encoder(
- enc_input,
- src_slf_attn_bias,
- n_layer,
- n_head,
- d_key,
- d_value,
- d_model,
- d_inner_hid,
- prepostprocess_dropout,
- attention_dropout,
- relu_dropout,
- preprocess_cmd,
- postprocess_cmd, )
- return enc_output
-
-
-def wrap_encoder(src_vocab_size,
- max_length,
- n_layer,
- n_head,
- d_key,
- d_value,
- d_model,
- d_inner_hid,
- prepostprocess_dropout,
- attention_dropout,
- relu_dropout,
- preprocess_cmd,
- postprocess_cmd,
- weight_sharing,
- enc_inputs=None,
- bos_idx=0):
- """
- The wrapper assembles together all needed layers for the encoder.
- img, src_pos, src_slf_attn_bias = enc_inputs
- img
- """
-
- src_word, src_pos, src_slf_attn_bias = enc_inputs #
-
- enc_input = prepare_decoder(
- src_word,
- src_pos,
- src_vocab_size,
- d_model,
- max_length,
- prepostprocess_dropout,
- bos_idx=bos_idx,
- word_emb_param_name="src_word_emb_table")
-
- enc_output = encoder(
- enc_input,
- src_slf_attn_bias,
- n_layer,
- n_head,
- d_key,
- d_value,
- d_model,
- d_inner_hid,
- prepostprocess_dropout,
- attention_dropout,
- relu_dropout,
- preprocess_cmd,
- postprocess_cmd, )
- return enc_output
diff --git a/ppocr/modeling/losses/__init__.py b/ppocr/modeling/losses/__init__.py
index abf198b97e6e818e1fbe59006f98492640bcee54..1c258bc19d0c5a759887895aea83e18034b8fb47 100755
--- a/ppocr/modeling/losses/__init__.py
+++ b/ppocr/modeling/losses/__init__.py
@@ -11,3 +11,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+import copy
+
+
+def build_loss(config):
+ # det loss
+ from .det_db_loss import DBLoss
+
+ # rec loss
+ from .rec_ctc_loss import CTCLoss
+
+ support_dict = ['DBLoss', 'CTCLoss']
+
+ config = copy.deepcopy(config)
+ module_name = config.pop('name')
+ assert module_name in support_dict, Exception('loss only support {}'.format(
+ support_dict))
+ module_class = eval(module_name)(**config)
+ return module_class
diff --git a/ppocr/modeling/losses/det_basic_loss.py b/ppocr/modeling/losses/det_basic_loss.py
index 8fb81070c449bcbb98728b781fc48d5ce83aa019..ef656e8c77de5d1bfa66d55ea2d7f68a9f7217ec 100644
--- a/ppocr/modeling/losses/det_basic_loss.py
+++ b/ppocr/modeling/losses/det_basic_loss.py
@@ -1,16 +1,16 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from __future__ import absolute_import
from __future__ import division
@@ -18,99 +18,189 @@ from __future__ import print_function
import numpy as np
-import paddle.fluid as fluid
-
-
-def BalanceLoss(pred,
- gt,
- mask,
- balance_loss=True,
- main_loss_type="DiceLoss",
- negative_ratio=3,
- return_origin=False,
- eps=1e-6):
- """
- The BalanceLoss for Differentiable Binarization text detection
- args:
- pred (variable): predicted feature maps.
- gt (variable): ground truth feature maps.
- mask (variable): masked maps.
- balance_loss (bool): whether balance loss or not, default is True
- main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
- 'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'.
- negative_ratio (int|float): float, default is 3.
- return_origin (bool): whether return unbalanced loss or not, default is False.
- eps (float): default is 1e-6.
- return: (variable) balanced loss
- """
- positive = gt * mask
- negative = (1 - gt) * mask
-
- positive_count = fluid.layers.reduce_sum(positive)
- positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32)
- negative_count = min(
- fluid.layers.reduce_sum(negative), positive_count * negative_ratio)
- negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32)
-
- if main_loss_type == "CrossEntropy":
- loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True)
- loss = fluid.layers.reduce_mean(loss)
- elif main_loss_type == "Euclidean":
- loss = fluid.layers.square(pred - gt)
- loss = fluid.layers.reduce_mean(loss)
- elif main_loss_type == "DiceLoss":
- loss = DiceLoss(pred, gt, mask)
- elif main_loss_type == "BCELoss":
- loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt)
- elif main_loss_type == "MaskL1Loss":
- loss = MaskL1Loss(pred, gt, mask)
- else:
- loss_type = [
- 'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
- ]
- raise Exception("main_loss_type in BalanceLoss() can only be one of {}".
- format(loss_type))
-
- if not balance_loss:
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+
+
+class BalanceLoss(nn.Layer):
+ def __init__(self,
+ balance_loss=True,
+ main_loss_type='DiceLoss',
+ negative_ratio=3,
+ return_origin=False,
+ eps=1e-6,
+ **kwargs):
+ """
+ The BalanceLoss for Differentiable Binarization text detection
+ args:
+ balance_loss (bool): whether balance loss or not, default is True
+ main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
+ 'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'.
+ negative_ratio (int|float): float, default is 3.
+ return_origin (bool): whether return unbalanced loss or not, default is False.
+ eps (float): default is 1e-6.
+ """
+ super(BalanceLoss, self).__init__()
+ self.balance_loss = balance_loss
+ self.main_loss_type = main_loss_type
+ self.negative_ratio = negative_ratio
+ self.main_loss_type = main_loss_type
+ self.return_origin = return_origin
+ self.eps = eps
+
+ if self.main_loss_type == "CrossEntropy":
+ self.loss = nn.CrossEntropyLoss()
+ elif self.main_loss_type == "Euclidean":
+ self.loss = nn.MSELoss()
+ elif self.main_loss_type == "DiceLoss":
+ self.loss = DiceLoss(self.eps)
+ elif self.main_loss_type == "BCELoss":
+ self.loss = BCELoss(reduction='none')
+ elif self.main_loss_type == "MaskL1Loss":
+ self.loss = MaskL1Loss(self.eps)
+ else:
+ loss_type = [
+ 'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
+ ]
+ raise Exception(
+ "main_loss_type in BalanceLoss() can only be one of {}".format(
+ loss_type))
+
+ def forward(self, pred, gt, mask=None):
+ """
+ The BalanceLoss for Differentiable Binarization text detection
+ args:
+ pred (variable): predicted feature maps.
+ gt (variable): ground truth feature maps.
+ mask (variable): masked maps.
+ return: (variable) balanced loss
+ """
+ # if self.main_loss_type in ['DiceLoss']:
+ # # For the loss that returns to scalar value, perform ohem on the mask
+ # mask = ohem_batch(pred, gt, mask, self.negative_ratio)
+ # loss = self.loss(pred, gt, mask)
+ # return loss
+
+ positive = gt * mask
+ negative = (1 - gt) * mask
+
+ positive_count = int(positive.sum())
+ negative_count = int(
+ min(negative.sum(), positive_count * self.negative_ratio))
+ loss = self.loss(pred, gt, mask=mask)
+
+ if not self.balance_loss:
+ return loss
+
+ positive_loss = positive * loss
+ negative_loss = negative * loss
+ negative_loss = paddle.reshape(negative_loss, shape=[-1])
+ if negative_count > 0:
+ sort_loss = negative_loss.sort(descending=True)
+ negative_loss = sort_loss[:negative_count]
+ # negative_loss, _ = paddle.topk(negative_loss, k=negative_count_int)
+ balance_loss = (positive_loss.sum() + negative_loss.sum()) / (
+ positive_count + negative_count + self.eps)
+ else:
+ balance_loss = positive_loss.sum() / (positive_count + self.eps)
+ if self.return_origin:
+ return balance_loss, loss
+
+ return balance_loss
+
+
+class DiceLoss(nn.Layer):
+ def __init__(self, eps=1e-6):
+ super(DiceLoss, self).__init__()
+ self.eps = eps
+
+ def forward(self, pred, gt, mask, weights=None):
+ """
+ DiceLoss function.
+ """
+
+ assert pred.shape == gt.shape
+ assert pred.shape == mask.shape
+ if weights is not None:
+ assert weights.shape == mask.shape
+ mask = weights * mask
+ intersection = paddle.sum(pred * gt * mask)
+
+ union = paddle.sum(pred * mask) + paddle.sum(gt * mask) + self.eps
+ loss = 1 - 2.0 * intersection / union
+ assert loss <= 1
return loss
- positive_loss = positive * loss
- negative_loss = negative * loss
- negative_loss = fluid.layers.reshape(negative_loss, shape=[-1])
- negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int)
- balance_loss = (fluid.layers.reduce_sum(positive_loss) +
- fluid.layers.reduce_sum(negative_loss)) / (
- positive_count + negative_count + eps)
-
- if return_origin:
- return balance_loss, loss
- return balance_loss
-
-
-def DiceLoss(pred, gt, mask, weights=None, eps=1e-6):
- """
- DiceLoss function.
- """
-
- assert pred.shape == gt.shape
- assert pred.shape == mask.shape
- if weights is not None:
- assert weights.shape == mask.shape
- mask = weights * mask
- intersection = fluid.layers.reduce_sum(pred * gt * mask)
-
- union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum(
- gt * mask) + eps
- loss = 1 - 2.0 * intersection / union
- assert loss <= 1
- return loss
-
-
-def MaskL1Loss(pred, gt, mask, eps=1e-6):
- """
- Mask L1 Loss
- """
- loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / (
- fluid.layers.reduce_sum(mask) + eps)
- loss = fluid.layers.reduce_mean(loss)
- return loss
+
+class MaskL1Loss(nn.Layer):
+ def __init__(self, eps=1e-6):
+ super(MaskL1Loss, self).__init__()
+ self.eps = eps
+
+ def forward(self, pred, gt, mask):
+ """
+ Mask L1 Loss
+ """
+ loss = (paddle.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
+ loss = paddle.mean(loss)
+ return loss
+
+
+class BCELoss(nn.Layer):
+ def __init__(self, reduction='mean'):
+ super(BCELoss, self).__init__()
+ self.reduction = reduction
+
+ def forward(self, input, label, mask=None, weight=None, name=None):
+ loss = F.binary_cross_entropy(input, label, reduction=self.reduction)
+ return loss
+
+
+def ohem_single(score, gt_text, training_mask, ohem_ratio):
+ pos_num = (int)(np.sum(gt_text > 0.5)) - (
+ int)(np.sum((gt_text > 0.5) & (training_mask <= 0.5)))
+
+ if pos_num == 0:
+ # selected_mask = gt_text.copy() * 0 # may be not good
+ selected_mask = training_mask
+ selected_mask = selected_mask.reshape(
+ 1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+ return selected_mask
+
+ neg_num = (int)(np.sum(gt_text <= 0.5))
+ neg_num = (int)(min(pos_num * ohem_ratio, neg_num))
+
+ if neg_num == 0:
+ selected_mask = training_mask
+ selected_mask = selected_mask.reshape(
+ 1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+ return selected_mask
+
+ neg_score = score[gt_text <= 0.5]
+ # 将负样本得分从高到低排序
+ neg_score_sorted = np.sort(-neg_score)
+ threshold = -neg_score_sorted[neg_num - 1]
+ # 选出 得分高的 负样本 和正样本 的 mask
+ selected_mask = ((score >= threshold) |
+ (gt_text > 0.5)) & (training_mask > 0.5)
+ selected_mask = selected_mask.reshape(
+ 1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+ return selected_mask
+
+
+def ohem_batch(scores, gt_texts, training_masks, ohem_ratio):
+ scores = scores.numpy()
+ gt_texts = gt_texts.numpy()
+ training_masks = training_masks.numpy()
+
+ selected_masks = []
+ for i in range(scores.shape[0]):
+ selected_masks.append(
+ ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[
+ i, :, :], ohem_ratio))
+
+ selected_masks = np.concatenate(selected_masks, 0)
+ selected_masks = paddle.to_variable(selected_masks)
+
+ return selected_masks
diff --git a/ppocr/modeling/losses/det_db_loss.py b/ppocr/modeling/losses/det_db_loss.py
index c35e33ae13b831fc2bbb355683adad6b83f1a7bf..f170f6734a19289305a41d9e3f17a51d5ad10ec7 100755
--- a/ppocr/modeling/losses/det_db_loss.py
+++ b/ppocr/modeling/losses/det_db_loss.py
@@ -1,68 +1,71 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from paddle import nn
+
from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
-class DBLoss(object):
+class DBLoss(nn.Layer):
"""
Differentiable Binarization (DB) Loss Function
args:
param (dict): the super paramter for DB Loss
"""
- def __init__(self, params):
+ def __init__(self,
+ balance_loss=True,
+ main_loss_type='DiceLoss',
+ alpha=5,
+ beta=10,
+ ohem_ratio=3,
+ eps=1e-6,
+ **kwargs):
super(DBLoss, self).__init__()
- self.balance_loss = params['balance_loss']
- self.main_loss_type = params['main_loss_type']
-
- self.alpha = params['alpha']
- self.beta = params['beta']
- self.ohem_ratio = params['ohem_ratio']
+ self.alpha = alpha
+ self.beta = beta
+ self.dice_loss = DiceLoss(eps=eps)
+ self.l1_loss = MaskL1Loss(eps=eps)
+ self.bce_loss = BalanceLoss(
+ balance_loss=balance_loss,
+ main_loss_type=main_loss_type,
+ negative_ratio=ohem_ratio)
- def __call__(self, predicts, labels):
- label_shrink_map = labels['shrink_map']
- label_shrink_mask = labels['shrink_mask']
- label_threshold_map = labels['threshold_map']
- label_threshold_mask = labels['threshold_mask']
- pred = predicts['maps']
- shrink_maps = pred[:, 0, :, :]
- threshold_maps = pred[:, 1, :, :]
- binary_maps = pred[:, 2, :, :]
+ def forward(self, predicts, labels):
+ label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = labels[
+ 1:]
+ shrink_maps = predicts[:, 0, :, :]
+ threshold_maps = predicts[:, 1, :, :]
+ binary_maps = predicts[:, 2, :, :]
- loss_shrink_maps = BalanceLoss(
- shrink_maps,
- label_shrink_map,
- label_shrink_mask,
- balance_loss=self.balance_loss,
- main_loss_type=self.main_loss_type,
- negative_ratio=self.ohem_ratio)
- loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map,
- label_threshold_mask)
- loss_binary_maps = DiceLoss(binary_maps, label_shrink_map,
- label_shrink_mask)
+ loss_shrink_maps = self.bce_loss(shrink_maps, label_shrink_map,
+ label_shrink_mask)
+ loss_threshold_maps = self.l1_loss(threshold_maps, label_threshold_map,
+ label_threshold_mask)
+ loss_binary_maps = self.dice_loss(binary_maps, label_shrink_map,
+ label_shrink_mask)
loss_shrink_maps = self.alpha * loss_shrink_maps
loss_threshold_maps = self.beta * loss_threshold_maps
- loss_all = loss_shrink_maps + loss_threshold_maps\
- + loss_binary_maps
- losses = {'total_loss':loss_all,\
- "loss_shrink_maps":loss_shrink_maps,\
- "loss_threshold_maps":loss_threshold_maps,\
- "loss_binary_maps":loss_binary_maps}
+ loss_all = loss_shrink_maps + loss_threshold_maps \
+ + loss_binary_maps
+ losses = {'loss': loss_all, \
+ "loss_shrink_maps": loss_shrink_maps, \
+ "loss_threshold_maps": loss_threshold_maps, \
+ "loss_binary_maps": loss_binary_maps}
return losses
diff --git a/ppocr/modeling/losses/det_east_loss.py b/ppocr/modeling/losses/det_east_loss.py
deleted file mode 100755
index 2019298ac34081b5a7e8623a900eaa8f3e02c45a..0000000000000000000000000000000000000000
--- a/ppocr/modeling/losses/det_east_loss.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-
-
-class EASTLoss(object):
- """
- EAST Loss function
- """
-
- def __init__(self, params=None):
- super(EASTLoss, self).__init__()
-
- def __call__(self, predicts, labels):
- f_score = predicts['f_score']
- f_geo = predicts['f_geo']
- l_score = labels['score']
- l_geo = labels['geo']
- l_mask = labels['mask']
- ##dice_loss
- intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
- union = fluid.layers.reduce_sum(f_score * l_mask)\
- + fluid.layers.reduce_sum(l_score * l_mask)
- dice_loss = 1 - 2 * intersection / (union + 1e-5)
- #smoooth_l1_loss
- channels = 8
- l_geo_split = fluid.layers.split(
- l_geo, num_or_sections=channels + 1, dim=1)
- f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1)
- smooth_l1 = 0
- for i in range(0, channels):
- geo_diff = l_geo_split[i] - f_geo_split[i]
- abs_geo_diff = fluid.layers.abs(geo_diff)
- smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score)
- smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32')
- in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
- (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
- out_loss = l_geo_split[-1] / channels * in_loss * l_score
- smooth_l1 += out_loss
- smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score)
- dice_loss = dice_loss * 0.01
- total_loss = dice_loss + smooth_l1_loss
- losses = {'total_loss':total_loss, "dice_loss":dice_loss,\
- "smooth_l1_loss":smooth_l1_loss}
- return losses
diff --git a/ppocr/modeling/losses/det_sast_loss.py b/ppocr/modeling/losses/det_sast_loss.py
deleted file mode 100644
index fb1a545af9d67b61878b3fb476fdc81746e8b992..0000000000000000000000000000000000000000
--- a/ppocr/modeling/losses/det_sast_loss.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-
-
-class SASTLoss(object):
- """
- SAST Loss function
- """
-
- def __init__(self, params=None):
- super(SASTLoss, self).__init__()
-
- def __call__(self, predicts, labels):
- """
- tcl_pos: N x 128 x 3
- tcl_mask: N x 128 x 1
- tcl_label: N x X list or LoDTensor
- """
-
- f_score = predicts['f_score']
- f_border = predicts['f_border']
- f_tvo = predicts['f_tvo']
- f_tco = predicts['f_tco']
-
- l_score = labels['input_score']
- l_border = labels['input_border']
- l_mask = labels['input_mask']
- l_tvo = labels['input_tvo']
- l_tco = labels['input_tco']
-
- #score_loss
- intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
- union = fluid.layers.reduce_sum(f_score * l_mask) + fluid.layers.reduce_sum(l_score * l_mask)
- score_loss = 1.0 - 2 * intersection / (union + 1e-5)
-
- #border loss
- l_border_split, l_border_norm = fluid.layers.split(l_border, num_or_sections=[4, 1], dim=1)
- f_border_split = f_border
- l_border_norm_split = fluid.layers.expand(x=l_border_norm, expand_times=[1, 4, 1, 1])
- l_border_score = fluid.layers.expand(x=l_score, expand_times=[1, 4, 1, 1])
- l_border_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 4, 1, 1])
- border_diff = l_border_split - f_border_split
- abs_border_diff = fluid.layers.abs(border_diff)
- border_sign = abs_border_diff < 1.0
- border_sign = fluid.layers.cast(border_sign, dtype='float32')
- border_sign.stop_gradient = True
- border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
- (abs_border_diff - 0.5) * (1.0 - border_sign)
- border_out_loss = l_border_norm_split * border_in_loss
- border_loss = fluid.layers.reduce_sum(border_out_loss * l_border_score * l_border_mask) / \
- (fluid.layers.reduce_sum(l_border_score * l_border_mask) + 1e-5)
-
- #tvo_loss
- l_tvo_split, l_tvo_norm = fluid.layers.split(l_tvo, num_or_sections=[8, 1], dim=1)
- f_tvo_split = f_tvo
- l_tvo_norm_split = fluid.layers.expand(x=l_tvo_norm, expand_times=[1, 8, 1, 1])
- l_tvo_score = fluid.layers.expand(x=l_score, expand_times=[1, 8, 1, 1])
- l_tvo_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 8, 1, 1])
- #
- tvo_geo_diff = l_tvo_split - f_tvo_split
- abs_tvo_geo_diff = fluid.layers.abs(tvo_geo_diff)
- tvo_sign = abs_tvo_geo_diff < 1.0
- tvo_sign = fluid.layers.cast(tvo_sign, dtype='float32')
- tvo_sign.stop_gradient = True
- tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
- (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
- tvo_out_loss = l_tvo_norm_split * tvo_in_loss
- tvo_loss = fluid.layers.reduce_sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
- (fluid.layers.reduce_sum(l_tvo_score * l_tvo_mask) + 1e-5)
-
- #tco_loss
- l_tco_split, l_tco_norm = fluid.layers.split(l_tco, num_or_sections=[2, 1], dim=1)
- f_tco_split = f_tco
- l_tco_norm_split = fluid.layers.expand(x=l_tco_norm, expand_times=[1, 2, 1, 1])
- l_tco_score = fluid.layers.expand(x=l_score, expand_times=[1, 2, 1, 1])
- l_tco_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 2, 1, 1])
- #
- tco_geo_diff = l_tco_split - f_tco_split
- abs_tco_geo_diff = fluid.layers.abs(tco_geo_diff)
- tco_sign = abs_tco_geo_diff < 1.0
- tco_sign = fluid.layers.cast(tco_sign, dtype='float32')
- tco_sign.stop_gradient = True
- tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
- (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
- tco_out_loss = l_tco_norm_split * tco_in_loss
- tco_loss = fluid.layers.reduce_sum(tco_out_loss * l_tco_score * l_tco_mask) / \
- (fluid.layers.reduce_sum(l_tco_score * l_tco_mask) + 1e-5)
-
-
- # total loss
- tvo_lw, tco_lw = 1.5, 1.5
- score_lw, border_lw = 1.0, 1.0
- total_loss = score_loss * score_lw + border_loss * border_lw + \
- tvo_loss * tvo_lw + tco_loss * tco_lw
-
- losses = {'total_loss':total_loss, "score_loss":score_loss,\
- "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
- return losses
\ No newline at end of file
diff --git a/ppocr/modeling/losses/rec_attention_loss.py b/ppocr/modeling/losses/rec_attention_loss.py
deleted file mode 100755
index 8d8d7c1359f5f5edf79aed39092fa637a6cbde03..0000000000000000000000000000000000000000
--- a/ppocr/modeling/losses/rec_attention_loss.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-import numpy as np
-
-
-class AttentionLoss(object):
- def __init__(self, params):
- super(AttentionLoss, self).__init__()
- self.char_num = params['char_num']
-
- def __call__(self, predicts, labels):
- predict = predicts['predict']
- label_out = labels['label_out']
- label_out = fluid.layers.cast(x=label_out, dtype='int64')
- cost = fluid.layers.cross_entropy(input=predict, label=label_out)
- sum_cost = fluid.layers.reduce_sum(cost)
- return sum_cost
diff --git a/ppocr/modeling/losses/rec_ctc_loss.py b/ppocr/modeling/losses/rec_ctc_loss.py
index 3552d320978f33ec3eb032c96654eb3b7886d8c0..7894bea399de040605c7ab61f095cb65d5e93add 100755
--- a/ppocr/modeling/losses/rec_ctc_loss.py
+++ b/ppocr/modeling/losses/rec_ctc_loss.py
@@ -1,36 +1,36 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import math
-
import paddle
-import paddle.fluid as fluid
+from paddle import nn
-class CTCLoss(object):
- def __init__(self, params):
+class CTCLoss(nn.Layer):
+ def __init__(self, **kwargs):
super(CTCLoss, self).__init__()
- self.char_num = params['char_num']
+ self.loss_func = nn.CTCLoss(blank=0, reduction='none')
- def __call__(self, predicts, labels):
- predict = predicts['predict']
- label = labels['label']
- cost = fluid.layers.warpctc(
- input=predict, label=label, blank=self.char_num, norm_by_times=True)
- sum_cost = fluid.layers.reduce_sum(cost)
- return sum_cost
+ def __call__(self, predicts, batch):
+ predicts = predicts.transpose((1, 0, 2))
+ N, B, _ = predicts.shape
+ preds_lengths = paddle.to_tensor([N] * B, dtype='int64')
+ labels = batch[1].astype("int32")
+ label_lengths = batch[2].astype('int64')
+ loss = self.loss_func(predicts, labels, preds_lengths, label_lengths)
+ loss = loss.mean()
+ return {'loss': loss}
diff --git a/ppocr/modeling/losses/rec_srn_loss.py b/ppocr/modeling/losses/rec_srn_loss.py
deleted file mode 100755
index b1ebd86fadfc48831ca3d03e79ef65f4be2aa5e8..0000000000000000000000000000000000000000
--- a/ppocr/modeling/losses/rec_srn_loss.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle
-import paddle.fluid as fluid
-
-
-class SRNLoss(object):
- def __init__(self, params):
- super(SRNLoss, self).__init__()
- self.char_num = params['char_num']
-
- def __call__(self, predicts, others):
- predict = predicts['predict']
- word_predict = predicts['word_out']
- gsrm_predict = predicts['gsrm_out']
- label = others['label']
- lbl_weight = others['lbl_weight']
-
- casted_label = fluid.layers.cast(x=label, dtype='int64')
- cost_word = fluid.layers.cross_entropy(
- input=word_predict, label=casted_label)
- cost_gsrm = fluid.layers.cross_entropy(
- input=gsrm_predict, label=casted_label)
- cost_vsfd = fluid.layers.cross_entropy(
- input=predict, label=casted_label)
-
- cost_word = fluid.layers.reshape(
- x=fluid.layers.reduce_sum(cost_word), shape=[1])
- cost_gsrm = fluid.layers.reshape(
- x=fluid.layers.reduce_sum(cost_gsrm), shape=[1])
- cost_vsfd = fluid.layers.reshape(
- x=fluid.layers.reduce_sum(cost_vsfd), shape=[1])
-
- sum_cost = fluid.layers.sum(
- [cost_word, cost_vsfd * 2.0, cost_gsrm * 0.15])
-
- return [sum_cost, cost_vsfd, cost_word]
diff --git a/tools/eval_utils/__init__.py b/ppocr/modeling/necks/__init__.py
similarity index 62%
rename from tools/eval_utils/__init__.py
rename to ppocr/modeling/necks/__init__.py
index abf198b97e6e818e1fbe59006f98492640bcee54..bc7fdb79b055d438314c50be6d8748b890918262 100644
--- a/tools/eval_utils/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -11,3 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+__all__ = ['build_neck']
+
+
+def build_neck(config):
+ from .fpn import FPN
+ from .rnn import SequenceEncoder
+ support_dict = ['FPN', 'SequenceEncoder']
+
+ module_name = config.pop('name')
+ assert module_name in support_dict, Exception('neck only support {}'.format(
+ support_dict))
+ module_class = eval(module_name)(**config)
+ return module_class
diff --git a/ppocr/modeling/necks/fpn.py b/ppocr/modeling/necks/fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..09f0bf9b02e30e2735a86b0a8aad68912e0fe193
--- /dev/null
+++ b/ppocr/modeling/necks/fpn.py
@@ -0,0 +1,113 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class FPN(nn.Layer):
+ def __init__(self, in_channels, out_channels, **kwargs):
+ super(FPN, self).__init__()
+ self.out_channels = out_channels
+ weight_attr = paddle.nn.initializer.MSRA(uniform=False)
+
+ self.in2_conv = nn.Conv2d(
+ in_channels=in_channels[0],
+ out_channels=self.out_channels,
+ kernel_size=1,
+ weight_attr=ParamAttr(
+ name='conv2d_51.w_0', initializer=weight_attr),
+ bias_attr=False)
+ self.in3_conv = nn.Conv2d(
+ in_channels=in_channels[1],
+ out_channels=self.out_channels,
+ kernel_size=1,
+ weight_attr=ParamAttr(
+ name='conv2d_50.w_0', initializer=weight_attr),
+ bias_attr=False)
+ self.in4_conv = nn.Conv2d(
+ in_channels=in_channels[2],
+ out_channels=self.out_channels,
+ kernel_size=1,
+ weight_attr=ParamAttr(
+ name='conv2d_49.w_0', initializer=weight_attr),
+ bias_attr=False)
+ self.in5_conv = nn.Conv2d(
+ in_channels=in_channels[3],
+ out_channels=self.out_channels,
+ kernel_size=1,
+ weight_attr=ParamAttr(
+ name='conv2d_48.w_0', initializer=weight_attr),
+ bias_attr=False)
+ self.p5_conv = nn.Conv2d(
+ in_channels=self.out_channels,
+ out_channels=self.out_channels // 4,
+ kernel_size=3,
+ padding=1,
+ weight_attr=ParamAttr(
+ name='conv2d_52.w_0', initializer=weight_attr),
+ bias_attr=False)
+ self.p4_conv = nn.Conv2d(
+ in_channels=self.out_channels,
+ out_channels=self.out_channels // 4,
+ kernel_size=3,
+ padding=1,
+ weight_attr=ParamAttr(
+ name='conv2d_53.w_0', initializer=weight_attr),
+ bias_attr=False)
+ self.p3_conv = nn.Conv2d(
+ in_channels=self.out_channels,
+ out_channels=self.out_channels // 4,
+ kernel_size=3,
+ padding=1,
+ weight_attr=ParamAttr(
+ name='conv2d_54.w_0', initializer=weight_attr),
+ bias_attr=False)
+ self.p2_conv = nn.Conv2d(
+ in_channels=self.out_channels,
+ out_channels=self.out_channels // 4,
+ kernel_size=3,
+ padding=1,
+ weight_attr=ParamAttr(
+ name='conv2d_55.w_0', initializer=weight_attr),
+ bias_attr=False)
+
+ def forward(self, x):
+ c2, c3, c4, c5 = x
+
+ in5 = self.in5_conv(c5)
+ in4 = self.in4_conv(c4)
+ in3 = self.in3_conv(c3)
+ in2 = self.in2_conv(c2)
+
+ out4 = in4 + F.resize_nearest(in5, scale=2) # 1/16
+ out3 = in3 + F.resize_nearest(out4, scale=2) # 1/8
+ out2 = in2 + F.resize_nearest(out3, scale=2) # 1/4
+
+ p5 = self.p5_conv(in5)
+ p4 = self.p4_conv(out4)
+ p3 = self.p3_conv(out3)
+ p2 = self.p2_conv(out2)
+ p5 = F.resize_nearest(p5, scale=8)
+ p4 = F.resize_nearest(p4, scale=4)
+ p3 = F.resize_nearest(p3, scale=2)
+
+ fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+ return fuse
diff --git a/ppocr/modeling/necks/rnn.py b/ppocr/modeling/necks/rnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a744e0d67ad902cfb52cbf1a0eb02e897c61d0b
--- /dev/null
+++ b/ppocr/modeling/necks/rnn.py
@@ -0,0 +1,143 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+
+from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr
+
+
+class EncoderWithReshape(nn.Layer):
+ def __init__(self, in_channels, **kwargs):
+ super().__init__()
+ self.out_channels = in_channels
+
+ def forward(self, x):
+ B, C, H, W = x.shape
+ x = x.reshape((B, C, -1))
+ x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels)
+ return x
+
+
+class Im2Seq(nn.Layer):
+ def __init__(self, in_channels, **kwargs):
+ super().__init__()
+ self.out_channels = in_channels
+
+ def forward(self, x):
+ B, C, H, W = x.shape
+ assert H == 1
+ x = x.transpose((0, 2, 3, 1))
+ x = x.reshape((-1, C))
+ return x
+
+
+class EncoderWithRNN(nn.Layer):
+ def __init__(self, in_channels, hidden_size):
+ super(EncoderWithRNN, self).__init__()
+ self.out_channels = hidden_size * 2
+ # self.lstm1_fw = nn.LSTMCell(
+ # in_channels,
+ # hidden_size,
+ # weight_ih_attr=ParamAttr(name='lstm_st1_fc1_w'),
+ # bias_ih_attr=ParamAttr(name='lstm_st1_fc1_b'),
+ # weight_hh_attr=ParamAttr(name='lstm_st1_out1_w'),
+ # bias_hh_attr=ParamAttr(name='lstm_st1_out1_b'),
+ # )
+ # self.lstm1_bw = nn.LSTMCell(
+ # in_channels,
+ # hidden_size,
+ # weight_ih_attr=ParamAttr(name='lstm_st1_fc2_w'),
+ # bias_ih_attr=ParamAttr(name='lstm_st1_fc2_b'),
+ # weight_hh_attr=ParamAttr(name='lstm_st1_out2_w'),
+ # bias_hh_attr=ParamAttr(name='lstm_st1_out2_b'),
+ # )
+ # self.lstm2_fw = nn.LSTMCell(
+ # hidden_size,
+ # hidden_size,
+ # weight_ih_attr=ParamAttr(name='lstm_st2_fc1_w'),
+ # bias_ih_attr=ParamAttr(name='lstm_st2_fc1_b'),
+ # weight_hh_attr=ParamAttr(name='lstm_st2_out1_w'),
+ # bias_hh_attr=ParamAttr(name='lstm_st2_out1_b'),
+ # )
+ # self.lstm2_bw = nn.LSTMCell(
+ # hidden_size,
+ # hidden_size,
+ # weight_ih_attr=ParamAttr(name='lstm_st2_fc2_w'),
+ # bias_ih_attr=ParamAttr(name='lstm_st2_fc2_b'),
+ # weight_hh_attr=ParamAttr(name='lstm_st2_out2_w'),
+ # bias_hh_attr=ParamAttr(name='lstm_st2_out2_b'),
+ # )
+ self.lstm = nn.LSTM(
+ in_channels, hidden_size, direction='bidirectional', num_layers=2)
+
+ def forward(self, x):
+ # fw_x, _ = self.lstm1_fw(x)
+ # fw_x, _ = self.lstm2_fw(fw_x)
+ #
+ # # bw
+ # bw_x, _ = self.lstm1_bw(x)
+ # bw_x, _ = self.lstm2_bw(bw_x)
+ # x = paddle.concat([fw_x, bw_x], axis=2)
+ x, _ = self.lstm(x)
+ return x
+
+
+class EncoderWithFC(nn.Layer):
+ def __init__(self, in_channels, hidden_size):
+ super(EncoderWithFC, self).__init__()
+ self.out_channels = hidden_size
+ weight_attr, bias_attr = get_para_bias_attr(
+ l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')
+ self.fc = nn.Linear(
+ in_channels,
+ hidden_size,
+ weight_attr=weight_attr,
+ bias_attr=bias_attr,
+ name='reduce_encoder_fea')
+
+ def forward(self, x):
+ x = self.fc(x)
+ return x
+
+
+class SequenceEncoder(nn.Layer):
+ def __init__(self, in_channels, encoder_type, hidden_size, **kwargs):
+ super(SequenceEncoder, self).__init__()
+ self.encoder_reshape = EncoderWithReshape(in_channels)
+ self.out_channels = self.encoder_reshape.out_channels
+ if encoder_type == 'reshape':
+ self.only_reshape = True
+ else:
+ support_encoder_dict = {
+ 'reshape': EncoderWithReshape,
+ 'fc': EncoderWithFC,
+ 'rnn': EncoderWithRNN
+ }
+ assert encoder_type in support_encoder_dict, '{} must in {}'.format(
+ encoder_type, support_encoder_dict.keys())
+
+ self.encoder = support_encoder_dict[encoder_type](
+ self.encoder_reshape.out_channels, hidden_size)
+ self.out_channels = self.encoder.out_channels
+ self.only_reshape = False
+
+ def forward(self, x):
+ x = self.encoder_reshape(x)
+ if not self.only_reshape:
+ x = self.encoder(x)
+ return x
diff --git a/ppocr/modeling/stns/tps.py b/ppocr/modeling/stns/tps.py
deleted file mode 100755
index 24c6448d2dc85442e85aff977727ecd4af6a439e..0000000000000000000000000000000000000000
--- a/ppocr/modeling/stns/tps.py
+++ /dev/null
@@ -1,261 +0,0 @@
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from paddle.fluid.param_attr import ParamAttr
-import numpy as np
-
-
-class LocalizationNetwork(object):
- def __init__(self, params):
- super(LocalizationNetwork, self).__init__()
- self.F = params['num_fiducial']
- self.loc_lr = params['loc_lr']
- self.model_name = params['model_name']
-
- def conv_bn_layer(self,
- input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- act=None,
- name=None):
- conv = layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=(filter_size - 1) // 2,
- groups=groups,
- act=None,
- param_attr=ParamAttr(name=name + "_weights"),
- bias_attr=False)
- bn_name = "bn_" + name
- return layers.batch_norm(
- input=conv,
- act=act,
- param_attr=ParamAttr(name=bn_name + '_scale'),
- bias_attr=ParamAttr(bn_name + '_offset'),
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
-
- def get_initial_fiducials(self):
- """ see RARE paper Fig. 6 (a) """
- F = self.F
- ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
- ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
- ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
- ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
- ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
- initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
- return initial_bias
-
- def __call__(self, image):
- F = self.F
- loc_lr = self.loc_lr
- if self.model_name == "large":
- num_filters_list = [64, 128, 256, 512]
- fc_dim = 256
- else:
- num_filters_list = [16, 32, 64, 128]
- fc_dim = 64
- for fno in range(len(num_filters_list)):
- num_filters = num_filters_list[fno]
- name = "loc_conv%d" % fno
- if fno == 0:
- conv = self.conv_bn_layer(
- image, num_filters, 3, act='relu', name=name)
- else:
- conv = self.conv_bn_layer(
- pool, num_filters, 3, act='relu', name=name)
-
- if fno == len(num_filters_list) - 1:
- pool = layers.adaptive_pool2d(
- input=conv, pool_size=[1, 1], pool_type='avg')
- else:
- pool = layers.pool2d(
- input=conv,
- pool_size=2,
- pool_stride=2,
- pool_padding=0,
- pool_type='max')
- name = "loc_fc1"
- stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
- fc1 = layers.fc(input=pool,
- size=fc_dim,
- param_attr=fluid.param_attr.ParamAttr(
- learning_rate=loc_lr,
- initializer=fluid.initializer.Uniform(-stdv, stdv),
- name=name + "_w"),
- act='relu',
- name=name)
-
- initial_bias = self.get_initial_fiducials()
- initial_bias = initial_bias.reshape(-1)
- name = "loc_fc2"
- param_attr = fluid.param_attr.ParamAttr(
- learning_rate=loc_lr,
- initializer=fluid.initializer.NumpyArrayInitializer(
- np.zeros([fc_dim, F * 2])),
- name=name + "_w")
- bias_attr = fluid.param_attr.ParamAttr(
- learning_rate=loc_lr,
- initializer=fluid.initializer.NumpyArrayInitializer(initial_bias),
- name=name + "_b")
- fc2 = layers.fc(input=fc1,
- size=F * 2,
- param_attr=param_attr,
- bias_attr=bias_attr,
- name=name)
- batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False)
- return batch_C_prime
-
-
-class GridGenerator(object):
- def __init__(self, params):
- super(GridGenerator, self).__init__()
- self.eps = 1e-6
- self.F = params['num_fiducial']
-
- def build_C(self):
- """ Return coordinates of fiducial points in I_r; C """
- F = self.F
- ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
- ctrl_pts_y_top = -1 * np.ones(int(F / 2))
- ctrl_pts_y_bottom = np.ones(int(F / 2))
- ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
- ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
- C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
- return C # F x 2
-
- def build_P(self, I_r_size):
- I_r_width, I_r_height = I_r_size
- I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\
- / I_r_width # self.I_r_width
- I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\
- / I_r_height # self.I_r_height
- # P: self.I_r_width x self.I_r_height x 2
- P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
- # n (= self.I_r_width x self.I_r_height) x 2
- return P.reshape([-1, 2])
-
- def build_inv_delta_C(self, C):
- """ Return inv_delta_C which is needed to calculate T """
- F = self.F
- hat_C = np.zeros((F, F), dtype=float) # F x F
- for i in range(0, F):
- for j in range(i, F):
- r = np.linalg.norm(C[i] - C[j])
- hat_C[i, j] = r
- hat_C[j, i] = r
- np.fill_diagonal(hat_C, 1)
- hat_C = (hat_C**2) * np.log(hat_C)
- # print(C.shape, hat_C.shape)
- delta_C = np.concatenate( # F+3 x F+3
- [
- np.concatenate(
- [np.ones((F, 1)), C, hat_C], axis=1), # F x F+3
- np.concatenate(
- [np.zeros((2, 3)), np.transpose(C)], axis=1), # 2 x F+3
- np.concatenate(
- [np.zeros((1, 3)), np.ones((1, F))], axis=1) # 1 x F+3
- ],
- axis=0)
- inv_delta_C = np.linalg.inv(delta_C)
- return inv_delta_C # F+3 x F+3
-
- def build_P_hat(self, C, P):
- F = self.F
- eps = self.eps
- n = P.shape[0] # n (= self.I_r_width x self.I_r_height)
- #P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
- P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
- C_tile = np.expand_dims(C, axis=0) # 1 x F x 2
- P_diff = P_tile - C_tile # n x F x 2
- #rbf_norm: n x F
- rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
- #rbf: n x F
- rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
- P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
- return P_hat # n x F+3
-
- def get_expand_tensor(self, batch_C_prime):
- name = "ex_fc"
- initializer = fluid.initializer.ConstantInitializer(value=0.0)
- param_attr = fluid.param_attr.ParamAttr(
- learning_rate=0.0, initializer=initializer, name=name + "_w")
- bias_attr = fluid.param_attr.ParamAttr(
- learning_rate=0.0, initializer=initializer, name=name + "_b")
- batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime,
- size=6,
- param_attr=param_attr,
- bias_attr=bias_attr,
- name=name)
- batch_C_ex_part_tensor = fluid.layers.reshape(
- x=batch_C_ex_part_tensor, shape=[-1, 3, 2])
- return batch_C_ex_part_tensor
-
- def __call__(self, batch_C_prime, I_r_size):
- C = self.build_C()
- P = self.build_P(I_r_size)
- inv_delta_C = self.build_inv_delta_C(C).astype('float32')
- P_hat = self.build_P_hat(C, P).astype('float32')
-
- inv_delta_C_tensor = layers.create_tensor(dtype='float32')
- layers.assign(inv_delta_C, inv_delta_C_tensor)
- inv_delta_C_tensor.stop_gradient = True
- P_hat_tensor = layers.create_tensor(dtype='float32')
- layers.assign(P_hat, P_hat_tensor)
- P_hat_tensor.stop_gradient = True
-
- batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
- # batch_C_ex_part_tensor = create_tmp_var(
- # fluid.default_main_program(),
- # name='batch_C_ex_part_tensor',
- # dtype='float32', shape=[-1, 3, 2])
- # layers.py_func(func=get_batch_C_expand,
- # x=[batch_C_prime], out=[batch_C_ex_part_tensor])
-
- batch_C_ex_part_tensor.stop_gradient = True
-
- batch_C_prime_with_zeros = layers.concat(
- [batch_C_prime, batch_C_ex_part_tensor], axis=1)
- batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
- batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
- return batch_P_prime
-
-
-class TPS(object):
- def __init__(self, params):
- super(TPS, self).__init__()
- self.loc_net = LocalizationNetwork(params)
- self.grid_generator = GridGenerator(params)
-
- def __call__(self, image):
- batch_C_prime = self.loc_net(image)
- I_r_size = [image.shape[3], image.shape[2]]
- batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
- batch_P_prime = layers.reshape(
- x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2])
- batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime)
- image.stop_gradient = False
- return batch_I_r
diff --git a/ppocr/modeling/stns/__init__.py b/ppocr/modeling/transform/__init__.py
similarity index 66%
rename from ppocr/modeling/stns/__init__.py
rename to ppocr/modeling/transform/__init__.py
index abf198b97e6e818e1fbe59006f98492640bcee54..af3b3f869759e9fa053e514628bdcd0d0d452c5c 100755
--- a/ppocr/modeling/stns/__init__.py
+++ b/ppocr/modeling/transform/__init__.py
@@ -11,3 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+__all__ = ['build_transform']
+
+
+def build_transform(config):
+ support_dict = ['']
+
+ module_name = config.pop('name')
+ assert module_name in support_dict, Exception(
+ 'transform only support {}'.format(support_dict))
+ module_class = eval(module_name)(**config)
+ return module_class
diff --git a/ppocr/optimizer.py b/ppocr/optimizer.py
deleted file mode 100644
index fd315cd1319d4925e893705957a42f931a39076e..0000000000000000000000000000000000000000
--- a/ppocr/optimizer.py
+++ /dev/null
@@ -1,155 +0,0 @@
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import math
-import paddle.fluid as fluid
-from paddle.fluid.regularizer import L2Decay
-from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
-import paddle.fluid.layers.ops as ops
-
-from ppocr.utils.utility import initial_logger
-
-logger = initial_logger()
-
-
-def cosine_decay_with_warmup(learning_rate,
- step_each_epoch,
- epochs=500,
- warmup_minibatch=1000):
- """Applies cosine decay to the learning rate.
- lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
- decrease lr for every mini-batch and start with warmup.
- """
- global_step = _decay_step_counter()
- lr = fluid.layers.tensor.create_global_var(
- shape=[1],
- value=0.0,
- dtype='float32',
- persistable=True,
- name="learning_rate")
-
- warmup_minibatch = fluid.layers.fill_constant(
- shape=[1],
- dtype='float32',
- value=float(warmup_minibatch),
- force_cpu=True)
-
- with fluid.layers.control_flow.Switch() as switch:
- with switch.case(global_step < warmup_minibatch):
- decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
- fluid.layers.tensor.assign(input=decayed_lr, output=lr)
- with switch.default():
- decayed_lr = learning_rate * \
- (ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
- fluid.layers.tensor.assign(input=decayed_lr, output=lr)
- return lr
-
-
-def AdamDecay(params, parameter_list=None):
- """
- define optimizer function
- args:
- params(dict): the super parameters
- parameter_list (list): list of Variable names to update to minimize loss
- return:
- """
- base_lr = params['base_lr']
- beta1 = params['beta1']
- beta2 = params['beta2']
- l2_decay = params.get("l2_decay", 0.0)
-
- if 'decay' in params:
- supported_decay_mode = [
- "cosine_decay", "cosine_decay_warmup", "piecewise_decay"
- ]
- params = params['decay']
- decay_mode = params['function']
- assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
- supported_decay_mode, decay_mode)
-
- if decay_mode == "cosine_decay":
- step_each_epoch = params['step_each_epoch']
- total_epoch = params['total_epoch']
- base_lr = fluid.layers.cosine_decay(
- learning_rate=base_lr,
- step_each_epoch=step_each_epoch,
- epochs=total_epoch)
- elif decay_mode == "cosine_decay_warmup":
- step_each_epoch = params['step_each_epoch']
- total_epoch = params['total_epoch']
- warmup_minibatch = params.get("warmup_minibatch", 1000)
- base_lr = cosine_decay_with_warmup(
- learning_rate=base_lr,
- step_each_epoch=step_each_epoch,
- epochs=total_epoch,
- warmup_minibatch=warmup_minibatch)
- elif decay_mode == "piecewise_decay":
- boundaries = params["boundaries"]
- decay_rate = params["decay_rate"]
- values = [
- base_lr * decay_rate**idx
- for idx in range(len(boundaries) + 1)
- ]
- base_lr = fluid.layers.piecewise_decay(boundaries, values)
-
- optimizer = fluid.optimizer.Adam(
- learning_rate=base_lr,
- beta1=beta1,
- beta2=beta2,
- regularization=L2Decay(regularization_coeff=l2_decay),
- parameter_list=parameter_list)
- return optimizer
-
-
-def RMSProp(params, parameter_list=None):
- """
- define optimizer function
- args:
- params(dict): the super parameters
- parameter_list (list): list of Variable names to update to minimize loss
- return:
- """
- base_lr = params.get("base_lr", 0.001)
- l2_decay = params.get("l2_decay", 0.00005)
-
- if 'decay' in params:
- supported_decay_mode = ["cosine_decay", "piecewise_decay"]
- params = params['decay']
- decay_mode = params['function']
- assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
- supported_decay_mode, decay_mode)
-
- if decay_mode == "cosine_decay":
- step_each_epoch = params['step_each_epoch']
- total_epoch = params['total_epoch']
- base_lr = fluid.layers.cosine_decay(
- learning_rate=base_lr,
- step_each_epoch=step_each_epoch,
- epochs=total_epoch)
- elif decay_mode == "piecewise_decay":
- boundaries = params["boundaries"]
- decay_rate = params["decay_rate"]
- values = [
- base_lr * decay_rate**idx
- for idx in range(len(boundaries) + 1)
- ]
- base_lr = fluid.layers.piecewise_decay(boundaries, values)
-
- optimizer = fluid.optimizer.RMSProp(
- learning_rate=base_lr,
- regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay))
-
- return optimizer
diff --git a/ppocr/optimizer/__init__.py b/ppocr/optimizer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a924f2668779b78bc69524f11406364dc2b99279
--- /dev/null
+++ b/ppocr/optimizer/__init__.py
@@ -0,0 +1,56 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+
+__all__ = ['build_optimizer']
+
+
+def build_lr_scheduler(lr_config, epochs, step_each_epoch):
+ from . import learning_rate
+ lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch})
+ if 'name' in lr_config:
+ lr_name = lr_config.pop('name')
+ lr = getattr(learning_rate, lr_name)(**lr_config)()
+ else:
+ lr = lr_config['lr']
+ return lr
+
+
+def build_optimizer(config, epochs, step_each_epoch, parameters):
+ from . import regularizer, optimizer
+ config = copy.deepcopy(config)
+ # step1 build lr
+ lr = build_lr_scheduler(
+ config.pop('learning_rate'), epochs, step_each_epoch)
+
+ # step2 build regularization
+ if 'regularizer' in config and config['regularizer'] is not None:
+ reg_config = config.pop('regularizer')
+ reg_name = reg_config.pop('name') + 'Decay'
+ reg = getattr(regularizer, reg_name)(**reg_config)()
+ else:
+ reg = None
+
+ # step3 build optimizer
+ optim_name = config.pop('name')
+ optim = getattr(optimizer, optim_name)(learning_rate=lr,
+ regularization=reg,
+ **config)
+ return optim(parameters), lr
diff --git a/ppocr/optimizer/learning_rate.py b/ppocr/optimizer/learning_rate.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b86e846a59455c178d91c65ca00d4e67010f1a3
--- /dev/null
+++ b/ppocr/optimizer/learning_rate.py
@@ -0,0 +1,183 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle.optimizer import lr_scheduler
+
+
+class Linear(object):
+ """
+ Linear learning rate decay
+ Args:
+ lr (float): The initial learning rate. It is a python float number.
+ epochs(int): The decay step size. It determines the decay cycle.
+ end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
+ power(float, optional): Power of polynomial. Default: 1.0.
+ last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+ """
+
+ def __init__(self,
+ lr,
+ epochs,
+ step_each_epoch,
+ end_lr=0.0,
+ power=1.0,
+ warmup_epoch=0,
+ last_epoch=-1,
+ **kwargs):
+ super(Linear, self).__init__()
+ self.lr = lr
+ self.epochs = epochs * step_each_epoch
+ self.end_lr = end_lr
+ self.power = power
+ self.last_epoch = last_epoch
+ self.warmup_epoch = warmup_epoch * step_each_epoch
+
+ def __call__(self):
+ learning_rate = lr_scheduler.PolynomialLR(
+ learning_rate=self.lr,
+ decay_steps=self.epochs,
+ end_lr=self.end_lr,
+ power=self.power,
+ last_epoch=self.last_epoch)
+ if self.warmup_epoch > 0:
+ learning_rate = lr_scheduler.LinearLrWarmup(
+ learning_rate=learning_rate,
+ warmup_steps=self.warmup_epoch,
+ start_lr=0.0,
+ end_lr=self.lr,
+ last_epoch=self.last_epoch)
+ return learning_rate
+
+
+class Cosine(object):
+ """
+ Cosine learning rate decay
+ lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
+ Args:
+ lr(float): initial learning rate
+ step_each_epoch(int): steps each epoch
+ epochs(int): total training epochs
+ last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+ """
+
+ def __init__(self,
+ lr,
+ step_each_epoch,
+ epochs,
+ warmup_epoch=0,
+ last_epoch=-1,
+ **kwargs):
+ super(Cosine, self).__init__()
+ self.lr = lr
+ self.T_max = step_each_epoch * epochs
+ self.last_epoch = last_epoch
+ self.warmup_epoch = warmup_epoch * step_each_epoch
+
+ def __call__(self):
+ learning_rate = lr_scheduler.CosineAnnealingLR(
+ learning_rate=self.lr, T_max=self.T_max, last_epoch=self.last_epoch)
+ if self.warmup_epoch > 0:
+ learning_rate = lr_scheduler.LinearLrWarmup(
+ learning_rate=learning_rate,
+ warmup_steps=self.warmup_epoch,
+ start_lr=0.0,
+ end_lr=self.lr,
+ last_epoch=self.last_epoch)
+ return learning_rate
+
+
+class Step(object):
+ """
+ Piecewise learning rate decay
+ Args:
+ step_each_epoch(int): steps each epoch
+ learning_rate (float): The initial learning rate. It is a python float number.
+ step_size (int): the interval to update.
+ gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
+ It should be less than 1.0. Default: 0.1.
+ last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+ """
+
+ def __init__(self,
+ lr,
+ step_size,
+ step_each_epoch,
+ gamma,
+ warmup_epoch=0,
+ last_epoch=-1,
+ **kwargs):
+ super(Step, self).__init__()
+ self.step_size = step_each_epoch * step_size
+ self.lr = lr
+ self.gamma = gamma
+ self.last_epoch = last_epoch
+ self.warmup_epoch = warmup_epoch * step_each_epoch
+
+ def __call__(self):
+ learning_rate = lr_scheduler.StepLR(
+ learning_rate=self.lr,
+ step_size=self.step_size,
+ gamma=self.gamma,
+ last_epoch=self.last_epoch)
+ if self.warmup_epoch > 0:
+ learning_rate = lr_scheduler.LinearLrWarmup(
+ learning_rate=learning_rate,
+ warmup_steps=self.warmup_epoch,
+ start_lr=0.0,
+ end_lr=self.lr,
+ last_epoch=self.last_epoch)
+ return learning_rate
+
+
+class Piecewise(object):
+ """
+ Piecewise learning rate decay
+ Args:
+ boundaries(list): A list of steps numbers. The type of element in the list is python int.
+ values(list): A list of learning rate values that will be picked during different epoch boundaries.
+ The type of element in the list is python float.
+ last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+ """
+
+ def __init__(self,
+ step_each_epoch,
+ decay_epochs,
+ values,
+ warmup_epoch=0,
+ last_epoch=-1,
+ **kwargs):
+ super(Piecewise, self).__init__()
+ self.boundaries = [step_each_epoch * e for e in decay_epochs]
+ self.values = values
+ self.last_epoch = last_epoch
+ self.warmup_epoch = warmup_epoch * step_each_epoch
+
+ def __call__(self):
+ learning_rate = lr_scheduler.PiecewiseLR(
+ boundaries=self.boundaries,
+ values=self.values,
+ last_epoch=self.last_epoch)
+ if self.warmup_epoch > 0:
+ learning_rate = lr_scheduler.LinearLrWarmup(
+ learning_rate=learning_rate,
+ warmup_steps=self.warmup_epoch,
+ start_lr=0.0,
+ end_lr=self.values[0],
+ last_epoch=self.last_epoch)
+ return learning_rate
diff --git a/ppocr/optimizer/optimizer.py b/ppocr/optimizer/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b378a3059bc1a3daa15f8b31bcf28e4d67fb7774
--- /dev/null
+++ b/ppocr/optimizer/optimizer.py
@@ -0,0 +1,119 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle import optimizer as optim
+
+
+class Momentum(object):
+ """
+ Simple Momentum optimizer with velocity state.
+ Args:
+ learning_rate (float|Variable) - The learning rate used to update parameters.
+ Can be a float value or a Variable with one float value as data element.
+ momentum (float) - Momentum factor.
+ regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+ """
+
+ def __init__(self, learning_rate, momentum, weight_decay=None, **args):
+ super(Momentum, self).__init__()
+ self.learning_rate = learning_rate
+ self.momentum = momentum
+ self.weight_decay = weight_decay
+
+ def __call__(self, parameters):
+ opt = optim.Momentum(
+ learning_rate=self.learning_rate,
+ momentum=self.momentum,
+ parameters=self.weight_decay,
+ weight_decay=parameters)
+ return opt
+
+
+class Adam(object):
+ def __init__(self,
+ learning_rate=0.001,
+ beta1=0.9,
+ beta2=0.999,
+ epsilon=1e-08,
+ parameter_list=None,
+ weight_decay=None,
+ grad_clip=None,
+ name=None,
+ lazy_mode=False,
+ **kwargs):
+ self.learning_rate = learning_rate
+ self.beta1 = beta1
+ self.beta2 = beta2
+ self.epsilon = epsilon
+ self.parameter_list = parameter_list
+ self.learning_rate = learning_rate
+ self.weight_decay = weight_decay
+ self.grad_clip = grad_clip
+ self.name = name
+ self.lazy_mode = lazy_mode
+
+ def __call__(self, parameters):
+ opt = optim.Adam(
+ learning_rate=self.learning_rate,
+ beta1=self.beta1,
+ beta2=self.beta2,
+ epsilon=self.epsilon,
+ weight_decay=self.weight_decay,
+ grad_clip=self.grad_clip,
+ name=self.name,
+ lazy_mode=self.lazy_mode,
+ parameters=parameters)
+ return opt
+
+
+class RMSProp(object):
+ """
+ Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
+ Args:
+ learning_rate (float|Variable) - The learning rate used to update parameters.
+ Can be a float value or a Variable with one float value as data element.
+ momentum (float) - Momentum factor.
+ rho (float) - rho value in equation.
+ epsilon (float) - avoid division by zero, default is 1e-6.
+ regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+ """
+
+ def __init__(self,
+ learning_rate,
+ momentum,
+ rho=0.95,
+ epsilon=1e-6,
+ weight_decay=None,
+ **args):
+ super(RMSProp, self).__init__()
+ self.learning_rate = learning_rate
+ self.momentum = momentum
+ self.rho = rho
+ self.epsilon = epsilon
+ self.weight_decay = weight_decay
+
+ def __call__(self, parameters):
+ opt = optim.RMSProp(
+ learning_rate=self.learning_rate,
+ momentum=self.momentum,
+ rho=self.rho,
+ epsilon=self.epsilon,
+ weight_decay=self.weight_decay,
+ parameters=parameters)
+ return opt
diff --git a/ppocr/optimizer/regularizer.py b/ppocr/optimizer/regularizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ac1b81f60677ebb5c4d9ef034fc6f84d97fd5f8
--- /dev/null
+++ b/ppocr/optimizer/regularizer.py
@@ -0,0 +1,54 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from paddle import fluid
+
+
+class L1Decay(object):
+ """
+ L1 Weight Decay Regularization, which encourages the weights to be sparse.
+ Args:
+ factor(float): regularization coeff. Default:0.0.
+ """
+
+ def __init__(self, factor=0.0):
+ super(L1Decay, self).__init__()
+ self.regularization_coeff = factor
+
+ def __call__(self):
+ reg = fluid.regularizer.L1Decay(
+ regularization_coeff=self.regularization_coeff)
+ return reg
+
+
+class L2Decay(object):
+ """
+ L2 Weight Decay Regularization, which encourages the weights to be sparse.
+ Args:
+ factor(float): regularization coeff. Default:0.0.
+ """
+
+ def __init__(self, factor=0.0):
+ super(L2Decay, self).__init__()
+ self.regularization_coeff = factor
+
+ def __call__(self):
+ reg = fluid.regularizer.L2Decay(
+ regularization_coeff=self.regularization_coeff)
+ return reg
diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..15fd7d3dd85e1c3245313c31840b831d7c72fbd5
--- /dev/null
+++ b/ppocr/postprocess/__init__.py
@@ -0,0 +1,38 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+
+__all__ = ['build_post_process']
+
+
+def build_post_process(config, global_config=None):
+ from .db_postprocess import DBPostProcess
+
+ from .rec_postprocess import CTCLabelDecode, AttnLabelDecode
+ support_dict = ['DBPostProcess', 'CTCLabelDecode', 'AttnLabelDecode']
+
+ config = copy.deepcopy(config)
+ module_name = config.pop('name')
+ if global_config is not None:
+ config.update(global_config)
+ assert module_name in support_dict, Exception(
+ 'post process only support {}'.format(support_dict))
+ module_class = eval(module_name)(**config)
+ return module_class
diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py
index f115f12ed177dda87d7caae2167e44dca037c9ae..f09acb2a6fc4384ad2dc73f0a1c3c31a5b537add 100644
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -16,11 +16,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
-
import numpy as np
-import string
import cv2
from shapely.geometry import Polygon
import pyclipper
@@ -31,11 +27,16 @@ class DBPostProcess(object):
The post process for Differentiable Binarization (DB).
"""
- def __init__(self, params):
- self.thresh = params['thresh']
- self.box_thresh = params['box_thresh']
- self.max_candidates = params['max_candidates']
- self.unclip_ratio = params['unclip_ratio']
+ def __init__(self,
+ thresh=0.3,
+ box_thresh=0.7,
+ max_candidates=1000,
+ unclip_ratio=2.0,
+ **kwargs):
+ self.thresh = thresh
+ self.box_thresh = box_thresh
+ self.max_candidates = max_candidates
+ self.unclip_ratio = unclip_ratio
self.min_size = 3
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
@@ -55,9 +56,9 @@ class DBPostProcess(object):
contours, _ = outs[0], outs[1]
num_contours = min(len(contours), self.max_candidates)
- boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
- scores = np.zeros((num_contours, ), dtype=np.float32)
+ boxes = []
+ scores = []
for index in range(num_contours):
contour = contours[index]
points, sside = self.get_mini_boxes(contour)
@@ -73,17 +74,14 @@ class DBPostProcess(object):
if sside < self.min_size + 2:
continue
box = np.array(box)
- if not isinstance(dest_width, int):
- dest_width = dest_width.item()
- dest_height = dest_height.item()
box[:, 0] = np.clip(
np.round(box[:, 0] / width * dest_width), 0, dest_width)
box[:, 1] = np.clip(
np.round(box[:, 1] / height * dest_height), 0, dest_height)
- boxes[index, :, :] = box.astype(np.int16)
- scores[index] = score
- return boxes, scores
+ boxes.append(box.astype(np.int16))
+ scores.append(score)
+ return np.array(boxes, dtype=np.int16), scores
def unclip(self, box):
unclip_ratio = self.unclip_ratio
@@ -131,28 +129,15 @@ class DBPostProcess(object):
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
- def __call__(self, outs_dict, ratio_list):
- pred = outs_dict['maps']
-
- pred = pred[:, 0, :, :]
+ def __call__(self, pred, shape_list):
+ pred = pred.numpy()[:, 0, :, :]
segmentation = pred > self.thresh
boxes_batch = []
for batch_index in range(pred.shape[0]):
- height, width = pred.shape[-2:]
- tmp_boxes, tmp_scores = self.boxes_from_bitmap(
+ height, width = shape_list[batch_index]
+ boxes, scores = self.boxes_from_bitmap(
pred[batch_index], segmentation[batch_index], width, height)
- boxes = []
- for k in range(len(tmp_boxes)):
- if tmp_scores[k] > self.box_thresh:
- boxes.append(tmp_boxes[k])
- if len(boxes) > 0:
- boxes = np.array(boxes)
-
- ratio_h, ratio_w = ratio_list[batch_index]
- boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
- boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
-
- boxes_batch.append(boxes)
+ boxes_batch.append({'points': boxes})
return boxes_batch
diff --git a/ppocr/postprocess/db_postprocess_torch.py b/ppocr/postprocess/db_postprocess_torch.py
new file mode 100644
index 0000000000000000000000000000000000000000..83770df081bc467d71114be2a3d571cacf3d51cc
--- /dev/null
+++ b/ppocr/postprocess/db_postprocess_torch.py
@@ -0,0 +1,133 @@
+import cv2
+import numpy as np
+import pyclipper
+from shapely.geometry import Polygon
+
+
+class DBPostProcess():
+ def __init__(self,
+ thresh=0.3,
+ box_thresh=0.7,
+ max_candidates=1000,
+ unclip_ratio=1.5):
+ self.min_size = 3
+ self.thresh = thresh
+ self.box_thresh = box_thresh
+ self.max_candidates = max_candidates
+ self.unclip_ratio = unclip_ratio
+
+ def __call__(self, pred, shape_list, is_output_polygon=False):
+ '''
+ batch: (image, polygons, ignore_tags
+ h_w_list: 包含[h,w]的数组
+ pred:
+ binary: text region segmentation map, with shape (N, 1,H, W)
+ '''
+ pred = pred.numpy()[:, 0, :, :]
+ segmentation = self.binarize(pred)
+ batch_out = []
+ for batch_index in range(pred.shape[0]):
+ height, width = shape_list[batch_index]
+ boxes, scores = self.post_p(
+ pred[batch_index],
+ segmentation[batch_index],
+ width,
+ height,
+ is_output_polygon=is_output_polygon)
+ batch_out.append({"points": boxes})
+ return batch_out
+
+ def binarize(self, pred):
+ return pred > self.thresh
+
+ def post_p(self,
+ pred,
+ bitmap,
+ dest_width,
+ dest_height,
+ is_output_polygon=True):
+ '''
+ _bitmap: single map with shape (H, W),
+ whose values are binarized as {0, 1}
+ '''
+ height, width = pred.shape
+ boxes = []
+ new_scores = []
+ contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
+ cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+ for contour in contours[:self.max_candidates]:
+ epsilon = 0.005 * cv2.arcLength(contour, True)
+ approx = cv2.approxPolyDP(contour, epsilon, True)
+ points = approx.reshape((-1, 2))
+ if points.shape[0] < 4:
+ continue
+ score = self.box_score_fast(pred, points.reshape(-1, 2))
+ if self.box_thresh > score:
+ continue
+
+ if points.shape[0] > 2:
+ box = self.unclip(points, unclip_ratio=self.unclip_ratio)
+ if len(box) > 1 or len(box) == 0:
+ continue
+ else:
+ continue
+ four_point_box, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
+ if sside < self.min_size + 2:
+ continue
+
+ if not is_output_polygon:
+ box = np.array(four_point_box)
+ else:
+ box = box.reshape(-1, 2)
+ box[:, 0] = np.clip(
+ np.round(box[:, 0] / width * dest_width), 0, dest_width)
+ box[:, 1] = np.clip(
+ np.round(box[:, 1] / height * dest_height), 0, dest_height)
+ boxes.append(box)
+ new_scores.append(score)
+ return boxes, new_scores
+
+ def unclip(self, box, unclip_ratio=1.5):
+ poly = Polygon(box)
+ distance = poly.area * unclip_ratio / poly.length
+ offset = pyclipper.PyclipperOffset()
+ offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+ expanded = np.array(offset.Execute(distance))
+ return expanded
+
+ def get_mini_boxes(self, contour):
+ bounding_box = cv2.minAreaRect(contour)
+ points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+
+ index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+ if points[1][1] > points[0][1]:
+ index_1 = 0
+ index_4 = 1
+ else:
+ index_1 = 1
+ index_4 = 0
+ if points[3][1] > points[2][1]:
+ index_2 = 2
+ index_3 = 3
+ else:
+ index_2 = 3
+ index_3 = 2
+
+ box = [
+ points[index_1], points[index_2], points[index_3], points[index_4]
+ ]
+ return box, min(bounding_box[1])
+
+ def box_score_fast(self, bitmap, _box):
+ h, w = bitmap.shape[:2]
+ box = _box.copy()
+ xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
+ xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
+ ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
+ ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
+
+ mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+ box[:, 0] = box[:, 0] - xmin
+ box[:, 1] = box[:, 1] - ymin
+ cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+ return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
diff --git a/ppocr/postprocess/east_postprocess.py b/ppocr/postprocess/east_postprocess.py
deleted file mode 100755
index 270cf6699bb7f77c730c6ff80b49f1798b9bb720..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/east_postprocess.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from .locality_aware_nms import nms_locality
-import cv2
-
-import os
-import sys
-__dir__ = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(__dir__)
-sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
-
-
-class EASTPostPocess(object):
- """
- The post process for EAST.
- """
-
- def __init__(self, params):
- self.score_thresh = params['score_thresh']
- self.cover_thresh = params['cover_thresh']
- self.nms_thresh = params['nms_thresh']
-
- # c++ la-nms is faster, but only support python 3.5
- self.is_python35 = False
- if sys.version_info.major == 3 and sys.version_info.minor == 5:
- self.is_python35 = True
-
- def restore_rectangle_quad(self, origin, geometry):
- """
- Restore rectangle from quadrangle.
- """
- # quad
- origin_concat = np.concatenate(
- (origin, origin, origin, origin), axis=1) # (n, 8)
- pred_quads = origin_concat - geometry
- pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2)
- return pred_quads
-
- def detect(self,
- score_map,
- geo_map,
- score_thresh=0.8,
- cover_thresh=0.1,
- nms_thresh=0.2):
- """
- restore text boxes from score map and geo map
- """
- score_map = score_map[0]
- geo_map = np.swapaxes(geo_map, 1, 0)
- geo_map = np.swapaxes(geo_map, 1, 2)
- # filter the score map
- xy_text = np.argwhere(score_map > score_thresh)
- if len(xy_text) == 0:
- return []
- # sort the text boxes via the y axis
- xy_text = xy_text[np.argsort(xy_text[:, 0])]
- #restore quad proposals
- text_box_restored = self.restore_rectangle_quad(
- xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
- boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
- boxes[:, :8] = text_box_restored.reshape((-1, 8))
- boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
- if self.is_python35:
- import lanms
- boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
- else:
- boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
- if boxes.shape[0] == 0:
- return []
- # Here we filter some low score boxes by the average score map,
- # this is different from the orginal paper.
- for i, box in enumerate(boxes):
- mask = np.zeros_like(score_map, dtype=np.uint8)
- cv2.fillPoly(mask, box[:8].reshape(
- (-1, 4, 2)).astype(np.int32) // 4, 1)
- boxes[i, 8] = cv2.mean(score_map, mask)[0]
- boxes = boxes[boxes[:, 8] > cover_thresh]
- return boxes
-
- def sort_poly(self, p):
- """
- Sort polygons.
- """
- min_axis = np.argmin(np.sum(p, axis=1))
- p = p[[min_axis, (min_axis + 1) % 4,\
- (min_axis + 2) % 4, (min_axis + 3) % 4]]
- if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
- return p
- else:
- return p[[0, 3, 2, 1]]
-
- def __call__(self, outs_dict, ratio_list):
- score_list = outs_dict['f_score']
- geo_list = outs_dict['f_geo']
- img_num = len(ratio_list)
- dt_boxes_list = []
- for ino in range(img_num):
- score = score_list[ino]
- geo = geo_list[ino]
- boxes = self.detect(
- score_map=score,
- geo_map=geo,
- score_thresh=self.score_thresh,
- cover_thresh=self.cover_thresh,
- nms_thresh=self.nms_thresh)
- boxes_norm = []
- if len(boxes) > 0:
- ratio_h, ratio_w = ratio_list[ino]
- boxes = boxes[:, :8].reshape((-1, 4, 2))
- boxes[:, :, 0] /= ratio_w
- boxes[:, :, 1] /= ratio_h
- for i_box, box in enumerate(boxes):
- box = self.sort_poly(box.astype(np.int32))
- if np.linalg.norm(box[0] - box[1]) < 5 \
- or np.linalg.norm(box[3] - box[0]) < 5:
- continue
- boxes_norm.append(box)
- dt_boxes_list.append(np.array(boxes_norm))
- return dt_boxes_list
diff --git a/ppocr/postprocess/lanms/.gitignore b/ppocr/postprocess/lanms/.gitignore
deleted file mode 100644
index 6a57227eb94eaea81d9b80e378d1c7c61e74f62e..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-adaptor.so
diff --git a/ppocr/postprocess/lanms/.ycm_extra_conf.py b/ppocr/postprocess/lanms/.ycm_extra_conf.py
deleted file mode 100644
index cd1a74e920bad8d84b755b5dbfbf83e6884836d6..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/.ycm_extra_conf.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (C) 2014 Google Inc.
-#
-# This file is part of YouCompleteMe.
-#
-# YouCompleteMe is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# YouCompleteMe is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with YouCompleteMe. If not, see .
-
-import os
-import sys
-import glob
-import ycm_core
-
-# These are the compilation flags that will be used in case there's no
-# compilation database set (by default, one is not set).
-# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-
-
-BASE_DIR = os.path.dirname(os.path.realpath(__file__))
-
-from plumbum.cmd import python_config
-
-
-flags = [
- '-Wall',
- '-Wextra',
- '-Wnon-virtual-dtor',
- '-Winvalid-pch',
- '-Wno-unused-local-typedefs',
- '-std=c++11',
- '-x', 'c++',
- '-Iinclude',
-] + python_config('--cflags').split()
-
-
-# Set this to the absolute path to the folder (NOT the file!) containing the
-# compile_commands.json file to use that instead of 'flags'. See here for
-# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
-#
-# Most projects will NOT need to set this to anything; you can just change the
-# 'flags' list of compilation flags.
-compilation_database_folder = ''
-
-if os.path.exists( compilation_database_folder ):
- database = ycm_core.CompilationDatabase( compilation_database_folder )
-else:
- database = None
-
-SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
-
-def DirectoryOfThisScript():
- return os.path.dirname( os.path.abspath( __file__ ) )
-
-
-def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
- if not working_directory:
- return list( flags )
- new_flags = []
- make_next_absolute = False
- path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
- for flag in flags:
- new_flag = flag
-
- if make_next_absolute:
- make_next_absolute = False
- if not flag.startswith( '/' ):
- new_flag = os.path.join( working_directory, flag )
-
- for path_flag in path_flags:
- if flag == path_flag:
- make_next_absolute = True
- break
-
- if flag.startswith( path_flag ):
- path = flag[ len( path_flag ): ]
- new_flag = path_flag + os.path.join( working_directory, path )
- break
-
- if new_flag:
- new_flags.append( new_flag )
- return new_flags
-
-
-def IsHeaderFile( filename ):
- extension = os.path.splitext( filename )[ 1 ]
- return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
-
-
-def GetCompilationInfoForFile( filename ):
- # The compilation_commands.json file generated by CMake does not have entries
- # for header files. So we do our best by asking the db for flags for a
- # corresponding source file, if any. If one exists, the flags for that file
- # should be good enough.
- if IsHeaderFile( filename ):
- basename = os.path.splitext( filename )[ 0 ]
- for extension in SOURCE_EXTENSIONS:
- replacement_file = basename + extension
- if os.path.exists( replacement_file ):
- compilation_info = database.GetCompilationInfoForFile(
- replacement_file )
- if compilation_info.compiler_flags_:
- return compilation_info
- return None
- return database.GetCompilationInfoForFile( filename )
-
-
-# This is the entry point; this function is called by ycmd to produce flags for
-# a file.
-def FlagsForFile( filename, **kwargs ):
- if database:
- # Bear in mind that compilation_info.compiler_flags_ does NOT return a
- # python list, but a "list-like" StringVec object
- compilation_info = GetCompilationInfoForFile( filename )
- if not compilation_info:
- return None
-
- final_flags = MakeRelativePathsInFlagsAbsolute(
- compilation_info.compiler_flags_,
- compilation_info.compiler_working_dir_ )
- else:
- relative_to = DirectoryOfThisScript()
- final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
-
- return {
- 'flags': final_flags,
- 'do_cache': True
- }
-
diff --git a/ppocr/postprocess/lanms/Makefile b/ppocr/postprocess/lanms/Makefile
deleted file mode 100644
index 416871d151f1a86969e7026d7c2418f19a83c078..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-CXXFLAGS = -I include -std=c++11 -O3 $(shell python3-config --cflags)
-LDFLAGS = $(shell python3-config --ldflags)
-
-DEPS = lanms.h $(shell find include -xtype f)
-CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
-
-LIB_SO = adaptor.so
-
-$(LIB_SO): $(CXX_SOURCES) $(DEPS)
- $(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
-
-clean:
- rm -rf $(LIB_SO)
diff --git a/ppocr/postprocess/lanms/__init__.py b/ppocr/postprocess/lanms/__init__.py
deleted file mode 100644
index 649d6468bf3605e863a878638f2682d393578534..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import subprocess
-import os
-import numpy as np
-
-BASE_DIR = os.path.dirname(os.path.realpath(__file__))
-
-if subprocess.call(['make', '-C', BASE_DIR]) != 0: # return value
- raise RuntimeError('Cannot compile lanms: {}'.format(BASE_DIR))
-
-
-def merge_quadrangle_n9(polys, thres=0.3, precision=10000):
- from .adaptor import merge_quadrangle_n9 as nms_impl
- if len(polys) == 0:
- return np.array([], dtype='float32')
- p = polys.copy()
- p[:,:8] *= precision
- ret = np.array(nms_impl(p, thres), dtype='float32')
- ret[:,:8] /= precision
- return ret
-
diff --git a/ppocr/postprocess/lanms/__main__.py b/ppocr/postprocess/lanms/__main__.py
deleted file mode 100644
index 72bba3603cc770964ec110bda57242d70b7fd5df..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/__main__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import numpy as np
-
-
-from . import merge_quadrangle_n9
-
-if __name__ == '__main__':
- # unit square with confidence 1
- q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32')
-
- print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2])))
diff --git a/ppocr/postprocess/lanms/adaptor.cpp b/ppocr/postprocess/lanms/adaptor.cpp
deleted file mode 100644
index 7d38278f9451b67ab533abb0c3473240fada9417..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/adaptor.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "pybind11/pybind11.h"
-#include "pybind11/numpy.h"
-#include "pybind11/stl.h"
-#include "pybind11/stl_bind.h"
-
-#include "lanms.h"
-
-namespace py = pybind11;
-
-
-namespace lanms_adaptor {
-
- std::vector> polys2floats(const std::vector &polys) {
- std::vector> ret;
- for (size_t i = 0; i < polys.size(); i ++) {
- auto &p = polys[i];
- auto &poly = p.poly;
- ret.emplace_back(std::vector{
- float(poly[0].X), float(poly[0].Y),
- float(poly[1].X), float(poly[1].Y),
- float(poly[2].X), float(poly[2].Y),
- float(poly[3].X), float(poly[3].Y),
- float(p.score),
- });
- }
-
- return ret;
- }
-
-
- /**
- *
- * \param quad_n9 an n-by-9 numpy array, where first 8 numbers denote the
- * quadrangle, and the last one is the score
- * \param iou_threshold two quadrangles with iou score above this threshold
- * will be merged
- *
- * \return an n-by-9 numpy array, the merged quadrangles
- */
- std::vector> merge_quadrangle_n9(
- py::array_t quad_n9,
- float iou_threshold) {
- auto pbuf = quad_n9.request();
- if (pbuf.ndim != 2 || pbuf.shape[1] != 9)
- throw std::runtime_error("quadrangles must have a shape of (n, 9)");
- auto n = pbuf.shape[0];
- auto ptr = static_cast(pbuf.ptr);
- return polys2floats(lanms::merge_quadrangle_n9(ptr, n, iou_threshold));
- }
-
-}
-
-PYBIND11_PLUGIN(adaptor) {
- py::module m("adaptor", "NMS");
-
- m.def("merge_quadrangle_n9", &lanms_adaptor::merge_quadrangle_n9,
- "merge quadrangels");
-
- return m.ptr();
-}
-
diff --git a/ppocr/postprocess/lanms/include/clipper/clipper.cpp b/ppocr/postprocess/lanms/include/clipper/clipper.cpp
deleted file mode 100644
index 09657560fe34fc66ddc5a98712e9adaedfbecd0a..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/include/clipper/clipper.cpp
+++ /dev/null
@@ -1,4622 +0,0 @@
-/*******************************************************************************
-* *
-* Author : Angus Johnson *
-* Version : 6.4.0 *
-* Date : 2 July 2015 *
-* Website : http://www.angusj.com *
-* Copyright : Angus Johnson 2010-2015 *
-* *
-* License: *
-* Use, modification & distribution is subject to Boost Software License Ver 1. *
-* http://www.boost.org/LICENSE_1_0.txt *
-* *
-* Attributions: *
-* The code in this library is an extension of Bala Vatti's clipping algorithm: *
-* "A generic solution to polygon clipping" *
-* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
-* http://portal.acm.org/citation.cfm?id=129906 *
-* *
-* Computer graphics and geometric modeling: implementation and algorithms *
-* By Max K. Agoston *
-* Springer; 1 edition (January 4, 2005) *
-* http://books.google.com/books?q=vatti+clipping+agoston *
-* *
-* See also: *
-* "Polygon Offsetting by Computing Winding Numbers" *
-* Paper no. DETC2005-85513 pp. 565-575 *
-* ASME 2005 International Design Engineering Technical Conferences *
-* and Computers and Information in Engineering Conference (IDETC/CIE2005) *
-* September 24-28, 2005 , Long Beach, California, USA *
-* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
-* *
-*******************************************************************************/
-
-/*******************************************************************************
-* *
-* This is a translation of the Delphi Clipper library and the naming style *
-* used has retained a Delphi flavour. *
-* *
-*******************************************************************************/
-
-#include "clipper.hpp"
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace ClipperLib {
-
-static double const pi = 3.141592653589793238;
-static double const two_pi = pi *2;
-static double const def_arc_tolerance = 0.25;
-
-enum Direction { dRightToLeft, dLeftToRight };
-
-static int const Unassigned = -1; //edge not currently 'owning' a solution
-static int const Skip = -2; //edge that would otherwise close a path
-
-#define HORIZONTAL (-1.0E+40)
-#define TOLERANCE (1.0e-20)
-#define NEAR_ZERO(val) (((val) > -TOLERANCE) && ((val) < TOLERANCE))
-
-struct TEdge {
- IntPoint Bot;
- IntPoint Curr; //current (updated for every new scanbeam)
- IntPoint Top;
- double Dx;
- PolyType PolyTyp;
- EdgeSide Side; //side only refers to current side of solution poly
- int WindDelta; //1 or -1 depending on winding direction
- int WindCnt;
- int WindCnt2; //winding count of the opposite polytype
- int OutIdx;
- TEdge *Next;
- TEdge *Prev;
- TEdge *NextInLML;
- TEdge *NextInAEL;
- TEdge *PrevInAEL;
- TEdge *NextInSEL;
- TEdge *PrevInSEL;
-};
-
-struct IntersectNode {
- TEdge *Edge1;
- TEdge *Edge2;
- IntPoint Pt;
-};
-
-struct LocalMinimum {
- cInt Y;
- TEdge *LeftBound;
- TEdge *RightBound;
-};
-
-struct OutPt;
-
-//OutRec: contains a path in the clipping solution. Edges in the AEL will
-//carry a pointer to an OutRec when they are part of the clipping solution.
-struct OutRec {
- int Idx;
- bool IsHole;
- bool IsOpen;
- OutRec *FirstLeft; //see comments in clipper.pas
- PolyNode *PolyNd;
- OutPt *Pts;
- OutPt *BottomPt;
-};
-
-struct OutPt {
- int Idx;
- IntPoint Pt;
- OutPt *Next;
- OutPt *Prev;
-};
-
-struct Join {
- OutPt *OutPt1;
- OutPt *OutPt2;
- IntPoint OffPt;
-};
-
-struct LocMinSorter
-{
- inline bool operator()(const LocalMinimum& locMin1, const LocalMinimum& locMin2)
- {
- return locMin2.Y < locMin1.Y;
- }
-};
-
-//------------------------------------------------------------------------------
-//------------------------------------------------------------------------------
-
-inline cInt Round(double val)
-{
- if ((val < 0)) return static_cast(val - 0.5);
- else return static_cast(val + 0.5);
-}
-//------------------------------------------------------------------------------
-
-inline cInt Abs(cInt val)
-{
- return val < 0 ? -val : val;
-}
-
-//------------------------------------------------------------------------------
-// PolyTree methods ...
-//------------------------------------------------------------------------------
-
-void PolyTree::Clear()
-{
- for (PolyNodes::size_type i = 0; i < AllNodes.size(); ++i)
- delete AllNodes[i];
- AllNodes.resize(0);
- Childs.resize(0);
-}
-//------------------------------------------------------------------------------
-
-PolyNode* PolyTree::GetFirst() const
-{
- if (!Childs.empty())
- return Childs[0];
- else
- return 0;
-}
-//------------------------------------------------------------------------------
-
-int PolyTree::Total() const
-{
- int result = (int)AllNodes.size();
- //with negative offsets, ignore the hidden outer polygon ...
- if (result > 0 && Childs[0] != AllNodes[0]) result--;
- return result;
-}
-
-//------------------------------------------------------------------------------
-// PolyNode methods ...
-//------------------------------------------------------------------------------
-
-PolyNode::PolyNode(): Childs(), Parent(0), Index(0), m_IsOpen(false)
-{
-}
-//------------------------------------------------------------------------------
-
-int PolyNode::ChildCount() const
-{
- return (int)Childs.size();
-}
-//------------------------------------------------------------------------------
-
-void PolyNode::AddChild(PolyNode& child)
-{
- unsigned cnt = (unsigned)Childs.size();
- Childs.push_back(&child);
- child.Parent = this;
- child.Index = cnt;
-}
-//------------------------------------------------------------------------------
-
-PolyNode* PolyNode::GetNext() const
-{
- if (!Childs.empty())
- return Childs[0];
- else
- return GetNextSiblingUp();
-}
-//------------------------------------------------------------------------------
-
-PolyNode* PolyNode::GetNextSiblingUp() const
-{
- if (!Parent) //protects against PolyTree.GetNextSiblingUp()
- return 0;
- else if (Index == Parent->Childs.size() - 1)
- return Parent->GetNextSiblingUp();
- else
- return Parent->Childs[Index + 1];
-}
-//------------------------------------------------------------------------------
-
-bool PolyNode::IsHole() const
-{
- bool result = true;
- PolyNode* node = Parent;
- while (node)
- {
- result = !result;
- node = node->Parent;
- }
- return result;
-}
-//------------------------------------------------------------------------------
-
-bool PolyNode::IsOpen() const
-{
- return m_IsOpen;
-}
-//------------------------------------------------------------------------------
-
-#ifndef use_int32
-
-//------------------------------------------------------------------------------
-// Int128 class (enables safe math on signed 64bit integers)
-// eg Int128 val1((long64)9223372036854775807); //ie 2^63 -1
-// Int128 val2((long64)9223372036854775807);
-// Int128 val3 = val1 * val2;
-// val3.AsString => "85070591730234615847396907784232501249" (8.5e+37)
-//------------------------------------------------------------------------------
-
-class Int128
-{
- public:
- ulong64 lo;
- long64 hi;
-
- Int128(long64 _lo = 0)
- {
- lo = (ulong64)_lo;
- if (_lo < 0) hi = -1; else hi = 0;
- }
-
-
- Int128(const Int128 &val): lo(val.lo), hi(val.hi){}
-
- Int128(const long64& _hi, const ulong64& _lo): lo(_lo), hi(_hi){}
-
- Int128& operator = (const long64 &val)
- {
- lo = (ulong64)val;
- if (val < 0) hi = -1; else hi = 0;
- return *this;
- }
-
- bool operator == (const Int128 &val) const
- {return (hi == val.hi && lo == val.lo);}
-
- bool operator != (const Int128 &val) const
- { return !(*this == val);}
-
- bool operator > (const Int128 &val) const
- {
- if (hi != val.hi)
- return hi > val.hi;
- else
- return lo > val.lo;
- }
-
- bool operator < (const Int128 &val) const
- {
- if (hi != val.hi)
- return hi < val.hi;
- else
- return lo < val.lo;
- }
-
- bool operator >= (const Int128 &val) const
- { return !(*this < val);}
-
- bool operator <= (const Int128 &val) const
- { return !(*this > val);}
-
- Int128& operator += (const Int128 &rhs)
- {
- hi += rhs.hi;
- lo += rhs.lo;
- if (lo < rhs.lo) hi++;
- return *this;
- }
-
- Int128 operator + (const Int128 &rhs) const
- {
- Int128 result(*this);
- result+= rhs;
- return result;
- }
-
- Int128& operator -= (const Int128 &rhs)
- {
- *this += -rhs;
- return *this;
- }
-
- Int128 operator - (const Int128 &rhs) const
- {
- Int128 result(*this);
- result -= rhs;
- return result;
- }
-
- Int128 operator-() const //unary negation
- {
- if (lo == 0)
- return Int128(-hi, 0);
- else
- return Int128(~hi, ~lo + 1);
- }
-
- operator double() const
- {
- const double shift64 = 18446744073709551616.0; //2^64
- if (hi < 0)
- {
- if (lo == 0) return (double)hi * shift64;
- else return -(double)(~lo + ~hi * shift64);
- }
- else
- return (double)(lo + hi * shift64);
- }
-
-};
-//------------------------------------------------------------------------------
-
-Int128 Int128Mul (long64 lhs, long64 rhs)
-{
- bool negate = (lhs < 0) != (rhs < 0);
-
- if (lhs < 0) lhs = -lhs;
- ulong64 int1Hi = ulong64(lhs) >> 32;
- ulong64 int1Lo = ulong64(lhs & 0xFFFFFFFF);
-
- if (rhs < 0) rhs = -rhs;
- ulong64 int2Hi = ulong64(rhs) >> 32;
- ulong64 int2Lo = ulong64(rhs & 0xFFFFFFFF);
-
- //nb: see comments in clipper.pas
- ulong64 a = int1Hi * int2Hi;
- ulong64 b = int1Lo * int2Lo;
- ulong64 c = int1Hi * int2Lo + int1Lo * int2Hi;
-
- Int128 tmp;
- tmp.hi = long64(a + (c >> 32));
- tmp.lo = long64(c << 32);
- tmp.lo += long64(b);
- if (tmp.lo < b) tmp.hi++;
- if (negate) tmp = -tmp;
- return tmp;
-};
-#endif
-
-//------------------------------------------------------------------------------
-// Miscellaneous global functions
-//------------------------------------------------------------------------------
-
-bool Orientation(const Path &poly)
-{
- return Area(poly) >= 0;
-}
-//------------------------------------------------------------------------------
-
-double Area(const Path &poly)
-{
- int size = (int)poly.size();
- if (size < 3) return 0;
-
- double a = 0;
- for (int i = 0, j = size -1; i < size; ++i)
- {
- a += ((double)poly[j].X + poly[i].X) * ((double)poly[j].Y - poly[i].Y);
- j = i;
- }
- return -a * 0.5;
-}
-//------------------------------------------------------------------------------
-
-double Area(const OutPt *op)
-{
- const OutPt *startOp = op;
- if (!op) return 0;
- double a = 0;
- do {
- a += (double)(op->Prev->Pt.X + op->Pt.X) * (double)(op->Prev->Pt.Y - op->Pt.Y);
- op = op->Next;
- } while (op != startOp);
- return a * 0.5;
-}
-//------------------------------------------------------------------------------
-
-double Area(const OutRec &outRec)
-{
- return Area(outRec.Pts);
-}
-//------------------------------------------------------------------------------
-
-bool PointIsVertex(const IntPoint &Pt, OutPt *pp)
-{
- OutPt *pp2 = pp;
- do
- {
- if (pp2->Pt == Pt) return true;
- pp2 = pp2->Next;
- }
- while (pp2 != pp);
- return false;
-}
-//------------------------------------------------------------------------------
-
-//See "The Point in Polygon Problem for Arbitrary Polygons" by Hormann & Agathos
-//http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.88.5498&rep=rep1&type=pdf
-int PointInPolygon(const IntPoint &pt, const Path &path)
-{
- //returns 0 if false, +1 if true, -1 if pt ON polygon boundary
- int result = 0;
- size_t cnt = path.size();
- if (cnt < 3) return 0;
- IntPoint ip = path[0];
- for(size_t i = 1; i <= cnt; ++i)
- {
- IntPoint ipNext = (i == cnt ? path[0] : path[i]);
- if (ipNext.Y == pt.Y)
- {
- if ((ipNext.X == pt.X) || (ip.Y == pt.Y &&
- ((ipNext.X > pt.X) == (ip.X < pt.X)))) return -1;
- }
- if ((ip.Y < pt.Y) != (ipNext.Y < pt.Y))
- {
- if (ip.X >= pt.X)
- {
- if (ipNext.X > pt.X) result = 1 - result;
- else
- {
- double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
- (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
- if (!d) return -1;
- if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result;
- }
- } else
- {
- if (ipNext.X > pt.X)
- {
- double d = (double)(ip.X - pt.X) * (ipNext.Y - pt.Y) -
- (double)(ipNext.X - pt.X) * (ip.Y - pt.Y);
- if (!d) return -1;
- if ((d > 0) == (ipNext.Y > ip.Y)) result = 1 - result;
- }
- }
- }
- ip = ipNext;
- }
- return result;
-}
-//------------------------------------------------------------------------------
-
-int PointInPolygon (const IntPoint &pt, OutPt *op)
-{
- //returns 0 if false, +1 if true, -1 if pt ON polygon boundary
- int result = 0;
- OutPt* startOp = op;
- for(;;)
- {
- if (op->Next->Pt.Y == pt.Y)
- {
- if ((op->Next->Pt.X == pt.X) || (op->Pt.Y == pt.Y &&
- ((op->Next->Pt.X > pt.X) == (op->Pt.X < pt.X)))) return -1;
- }
- if ((op->Pt.Y < pt.Y) != (op->Next->Pt.Y < pt.Y))
- {
- if (op->Pt.X >= pt.X)
- {
- if (op->Next->Pt.X > pt.X) result = 1 - result;
- else
- {
- double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
- (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
- if (!d) return -1;
- if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result;
- }
- } else
- {
- if (op->Next->Pt.X > pt.X)
- {
- double d = (double)(op->Pt.X - pt.X) * (op->Next->Pt.Y - pt.Y) -
- (double)(op->Next->Pt.X - pt.X) * (op->Pt.Y - pt.Y);
- if (!d) return -1;
- if ((d > 0) == (op->Next->Pt.Y > op->Pt.Y)) result = 1 - result;
- }
- }
- }
- op = op->Next;
- if (startOp == op) break;
- }
- return result;
-}
-//------------------------------------------------------------------------------
-
-bool Poly2ContainsPoly1(OutPt *OutPt1, OutPt *OutPt2)
-{
- OutPt* op = OutPt1;
- do
- {
- //nb: PointInPolygon returns 0 if false, +1 if true, -1 if pt on polygon
- int res = PointInPolygon(op->Pt, OutPt2);
- if (res >= 0) return res > 0;
- op = op->Next;
- }
- while (op != OutPt1);
- return true;
-}
-//----------------------------------------------------------------------
-
-bool SlopesEqual(const TEdge &e1, const TEdge &e2, bool UseFullInt64Range)
-{
-#ifndef use_int32
- if (UseFullInt64Range)
- return Int128Mul(e1.Top.Y - e1.Bot.Y, e2.Top.X - e2.Bot.X) ==
- Int128Mul(e1.Top.X - e1.Bot.X, e2.Top.Y - e2.Bot.Y);
- else
-#endif
- return (e1.Top.Y - e1.Bot.Y) * (e2.Top.X - e2.Bot.X) ==
- (e1.Top.X - e1.Bot.X) * (e2.Top.Y - e2.Bot.Y);
-}
-//------------------------------------------------------------------------------
-
-bool SlopesEqual(const IntPoint pt1, const IntPoint pt2,
- const IntPoint pt3, bool UseFullInt64Range)
-{
-#ifndef use_int32
- if (UseFullInt64Range)
- return Int128Mul(pt1.Y-pt2.Y, pt2.X-pt3.X) == Int128Mul(pt1.X-pt2.X, pt2.Y-pt3.Y);
- else
-#endif
- return (pt1.Y-pt2.Y)*(pt2.X-pt3.X) == (pt1.X-pt2.X)*(pt2.Y-pt3.Y);
-}
-//------------------------------------------------------------------------------
-
-bool SlopesEqual(const IntPoint pt1, const IntPoint pt2,
- const IntPoint pt3, const IntPoint pt4, bool UseFullInt64Range)
-{
-#ifndef use_int32
- if (UseFullInt64Range)
- return Int128Mul(pt1.Y-pt2.Y, pt3.X-pt4.X) == Int128Mul(pt1.X-pt2.X, pt3.Y-pt4.Y);
- else
-#endif
- return (pt1.Y-pt2.Y)*(pt3.X-pt4.X) == (pt1.X-pt2.X)*(pt3.Y-pt4.Y);
-}
-//------------------------------------------------------------------------------
-
-inline bool IsHorizontal(TEdge &e)
-{
- return e.Dx == HORIZONTAL;
-}
-//------------------------------------------------------------------------------
-
-inline double GetDx(const IntPoint pt1, const IntPoint pt2)
-{
- return (pt1.Y == pt2.Y) ?
- HORIZONTAL : (double)(pt2.X - pt1.X) / (pt2.Y - pt1.Y);
-}
-//---------------------------------------------------------------------------
-
-inline void SetDx(TEdge &e)
-{
- cInt dy = (e.Top.Y - e.Bot.Y);
- if (dy == 0) e.Dx = HORIZONTAL;
- else e.Dx = (double)(e.Top.X - e.Bot.X) / dy;
-}
-//---------------------------------------------------------------------------
-
-inline void SwapSides(TEdge &Edge1, TEdge &Edge2)
-{
- EdgeSide Side = Edge1.Side;
- Edge1.Side = Edge2.Side;
- Edge2.Side = Side;
-}
-//------------------------------------------------------------------------------
-
-inline void SwapPolyIndexes(TEdge &Edge1, TEdge &Edge2)
-{
- int OutIdx = Edge1.OutIdx;
- Edge1.OutIdx = Edge2.OutIdx;
- Edge2.OutIdx = OutIdx;
-}
-//------------------------------------------------------------------------------
-
-inline cInt TopX(TEdge &edge, const cInt currentY)
-{
- return ( currentY == edge.Top.Y ) ?
- edge.Top.X : edge.Bot.X + Round(edge.Dx *(currentY - edge.Bot.Y));
-}
-//------------------------------------------------------------------------------
-
-void IntersectPoint(TEdge &Edge1, TEdge &Edge2, IntPoint &ip)
-{
-#ifdef use_xyz
- ip.Z = 0;
-#endif
-
- double b1, b2;
- if (Edge1.Dx == Edge2.Dx)
- {
- ip.Y = Edge1.Curr.Y;
- ip.X = TopX(Edge1, ip.Y);
- return;
- }
- else if (Edge1.Dx == 0)
- {
- ip.X = Edge1.Bot.X;
- if (IsHorizontal(Edge2))
- ip.Y = Edge2.Bot.Y;
- else
- {
- b2 = Edge2.Bot.Y - (Edge2.Bot.X / Edge2.Dx);
- ip.Y = Round(ip.X / Edge2.Dx + b2);
- }
- }
- else if (Edge2.Dx == 0)
- {
- ip.X = Edge2.Bot.X;
- if (IsHorizontal(Edge1))
- ip.Y = Edge1.Bot.Y;
- else
- {
- b1 = Edge1.Bot.Y - (Edge1.Bot.X / Edge1.Dx);
- ip.Y = Round(ip.X / Edge1.Dx + b1);
- }
- }
- else
- {
- b1 = Edge1.Bot.X - Edge1.Bot.Y * Edge1.Dx;
- b2 = Edge2.Bot.X - Edge2.Bot.Y * Edge2.Dx;
- double q = (b2-b1) / (Edge1.Dx - Edge2.Dx);
- ip.Y = Round(q);
- if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
- ip.X = Round(Edge1.Dx * q + b1);
- else
- ip.X = Round(Edge2.Dx * q + b2);
- }
-
- if (ip.Y < Edge1.Top.Y || ip.Y < Edge2.Top.Y)
- {
- if (Edge1.Top.Y > Edge2.Top.Y)
- ip.Y = Edge1.Top.Y;
- else
- ip.Y = Edge2.Top.Y;
- if (std::fabs(Edge1.Dx) < std::fabs(Edge2.Dx))
- ip.X = TopX(Edge1, ip.Y);
- else
- ip.X = TopX(Edge2, ip.Y);
- }
- //finally, don't allow 'ip' to be BELOW curr.Y (ie bottom of scanbeam) ...
- if (ip.Y > Edge1.Curr.Y)
- {
- ip.Y = Edge1.Curr.Y;
- //use the more vertical edge to derive X ...
- if (std::fabs(Edge1.Dx) > std::fabs(Edge2.Dx))
- ip.X = TopX(Edge2, ip.Y); else
- ip.X = TopX(Edge1, ip.Y);
- }
-}
-//------------------------------------------------------------------------------
-
-void ReversePolyPtLinks(OutPt *pp)
-{
- if (!pp) return;
- OutPt *pp1, *pp2;
- pp1 = pp;
- do {
- pp2 = pp1->Next;
- pp1->Next = pp1->Prev;
- pp1->Prev = pp2;
- pp1 = pp2;
- } while( pp1 != pp );
-}
-//------------------------------------------------------------------------------
-
-void DisposeOutPts(OutPt*& pp)
-{
- if (pp == 0) return;
- pp->Prev->Next = 0;
- while( pp )
- {
- OutPt *tmpPp = pp;
- pp = pp->Next;
- delete tmpPp;
- }
-}
-//------------------------------------------------------------------------------
-
-inline void InitEdge(TEdge* e, TEdge* eNext, TEdge* ePrev, const IntPoint& Pt)
-{
- std::memset(e, 0, sizeof(TEdge));
- e->Next = eNext;
- e->Prev = ePrev;
- e->Curr = Pt;
- e->OutIdx = Unassigned;
-}
-//------------------------------------------------------------------------------
-
-void InitEdge2(TEdge& e, PolyType Pt)
-{
- if (e.Curr.Y >= e.Next->Curr.Y)
- {
- e.Bot = e.Curr;
- e.Top = e.Next->Curr;
- } else
- {
- e.Top = e.Curr;
- e.Bot = e.Next->Curr;
- }
- SetDx(e);
- e.PolyTyp = Pt;
-}
-//------------------------------------------------------------------------------
-
-TEdge* RemoveEdge(TEdge* e)
-{
- //removes e from double_linked_list (but without removing from memory)
- e->Prev->Next = e->Next;
- e->Next->Prev = e->Prev;
- TEdge* result = e->Next;
- e->Prev = 0; //flag as removed (see ClipperBase.Clear)
- return result;
-}
-//------------------------------------------------------------------------------
-
-inline void ReverseHorizontal(TEdge &e)
-{
- //swap horizontal edges' Top and Bottom x's so they follow the natural
- //progression of the bounds - ie so their xbots will align with the
- //adjoining lower edge. [Helpful in the ProcessHorizontal() method.]
- std::swap(e.Top.X, e.Bot.X);
-#ifdef use_xyz
- std::swap(e.Top.Z, e.Bot.Z);
-#endif
-}
-//------------------------------------------------------------------------------
-
-void SwapPoints(IntPoint &pt1, IntPoint &pt2)
-{
- IntPoint tmp = pt1;
- pt1 = pt2;
- pt2 = tmp;
-}
-//------------------------------------------------------------------------------
-
-bool GetOverlapSegment(IntPoint pt1a, IntPoint pt1b, IntPoint pt2a,
- IntPoint pt2b, IntPoint &pt1, IntPoint &pt2)
-{
- //precondition: segments are Collinear.
- if (Abs(pt1a.X - pt1b.X) > Abs(pt1a.Y - pt1b.Y))
- {
- if (pt1a.X > pt1b.X) SwapPoints(pt1a, pt1b);
- if (pt2a.X > pt2b.X) SwapPoints(pt2a, pt2b);
- if (pt1a.X > pt2a.X) pt1 = pt1a; else pt1 = pt2a;
- if (pt1b.X < pt2b.X) pt2 = pt1b; else pt2 = pt2b;
- return pt1.X < pt2.X;
- } else
- {
- if (pt1a.Y < pt1b.Y) SwapPoints(pt1a, pt1b);
- if (pt2a.Y < pt2b.Y) SwapPoints(pt2a, pt2b);
- if (pt1a.Y < pt2a.Y) pt1 = pt1a; else pt1 = pt2a;
- if (pt1b.Y > pt2b.Y) pt2 = pt1b; else pt2 = pt2b;
- return pt1.Y > pt2.Y;
- }
-}
-//------------------------------------------------------------------------------
-
-bool FirstIsBottomPt(const OutPt* btmPt1, const OutPt* btmPt2)
-{
- OutPt *p = btmPt1->Prev;
- while ((p->Pt == btmPt1->Pt) && (p != btmPt1)) p = p->Prev;
- double dx1p = std::fabs(GetDx(btmPt1->Pt, p->Pt));
- p = btmPt1->Next;
- while ((p->Pt == btmPt1->Pt) && (p != btmPt1)) p = p->Next;
- double dx1n = std::fabs(GetDx(btmPt1->Pt, p->Pt));
-
- p = btmPt2->Prev;
- while ((p->Pt == btmPt2->Pt) && (p != btmPt2)) p = p->Prev;
- double dx2p = std::fabs(GetDx(btmPt2->Pt, p->Pt));
- p = btmPt2->Next;
- while ((p->Pt == btmPt2->Pt) && (p != btmPt2)) p = p->Next;
- double dx2n = std::fabs(GetDx(btmPt2->Pt, p->Pt));
-
- if (std::max(dx1p, dx1n) == std::max(dx2p, dx2n) &&
- std::min(dx1p, dx1n) == std::min(dx2p, dx2n))
- return Area(btmPt1) > 0; //if otherwise identical use orientation
- else
- return (dx1p >= dx2p && dx1p >= dx2n) || (dx1n >= dx2p && dx1n >= dx2n);
-}
-//------------------------------------------------------------------------------
-
-OutPt* GetBottomPt(OutPt *pp)
-{
- OutPt* dups = 0;
- OutPt* p = pp->Next;
- while (p != pp)
- {
- if (p->Pt.Y > pp->Pt.Y)
- {
- pp = p;
- dups = 0;
- }
- else if (p->Pt.Y == pp->Pt.Y && p->Pt.X <= pp->Pt.X)
- {
- if (p->Pt.X < pp->Pt.X)
- {
- dups = 0;
- pp = p;
- } else
- {
- if (p->Next != pp && p->Prev != pp) dups = p;
- }
- }
- p = p->Next;
- }
- if (dups)
- {
- //there appears to be at least 2 vertices at BottomPt so ...
- while (dups != p)
- {
- if (!FirstIsBottomPt(p, dups)) pp = dups;
- dups = dups->Next;
- while (dups->Pt != pp->Pt) dups = dups->Next;
- }
- }
- return pp;
-}
-//------------------------------------------------------------------------------
-
-bool Pt2IsBetweenPt1AndPt3(const IntPoint pt1,
- const IntPoint pt2, const IntPoint pt3)
-{
- if ((pt1 == pt3) || (pt1 == pt2) || (pt3 == pt2))
- return false;
- else if (pt1.X != pt3.X)
- return (pt2.X > pt1.X) == (pt2.X < pt3.X);
- else
- return (pt2.Y > pt1.Y) == (pt2.Y < pt3.Y);
-}
-//------------------------------------------------------------------------------
-
-bool HorzSegmentsOverlap(cInt seg1a, cInt seg1b, cInt seg2a, cInt seg2b)
-{
- if (seg1a > seg1b) std::swap(seg1a, seg1b);
- if (seg2a > seg2b) std::swap(seg2a, seg2b);
- return (seg1a < seg2b) && (seg2a < seg1b);
-}
-
-//------------------------------------------------------------------------------
-// ClipperBase class methods ...
-//------------------------------------------------------------------------------
-
-ClipperBase::ClipperBase() //constructor
-{
- m_CurrentLM = m_MinimaList.begin(); //begin() == end() here
- m_UseFullRange = false;
-}
-//------------------------------------------------------------------------------
-
-ClipperBase::~ClipperBase() //destructor
-{
- Clear();
-}
-//------------------------------------------------------------------------------
-
-void RangeTest(const IntPoint& Pt, bool& useFullRange)
-{
- if (useFullRange)
- {
- if (Pt.X > hiRange || Pt.Y > hiRange || -Pt.X > hiRange || -Pt.Y > hiRange)
- throw clipperException("Coordinate outside allowed range");
- }
- else if (Pt.X > loRange|| Pt.Y > loRange || -Pt.X > loRange || -Pt.Y > loRange)
- {
- useFullRange = true;
- RangeTest(Pt, useFullRange);
- }
-}
-//------------------------------------------------------------------------------
-
-TEdge* FindNextLocMin(TEdge* E)
-{
- for (;;)
- {
- while (E->Bot != E->Prev->Bot || E->Curr == E->Top) E = E->Next;
- if (!IsHorizontal(*E) && !IsHorizontal(*E->Prev)) break;
- while (IsHorizontal(*E->Prev)) E = E->Prev;
- TEdge* E2 = E;
- while (IsHorizontal(*E)) E = E->Next;
- if (E->Top.Y == E->Prev->Bot.Y) continue; //ie just an intermediate horz.
- if (E2->Prev->Bot.X < E->Bot.X) E = E2;
- break;
- }
- return E;
-}
-//------------------------------------------------------------------------------
-
-TEdge* ClipperBase::ProcessBound(TEdge* E, bool NextIsForward)
-{
- TEdge *Result = E;
- TEdge *Horz = 0;
-
- if (E->OutIdx == Skip)
- {
- //if edges still remain in the current bound beyond the skip edge then
- //create another LocMin and call ProcessBound once more
- if (NextIsForward)
- {
- while (E->Top.Y == E->Next->Bot.Y) E = E->Next;
- //don't include top horizontals when parsing a bound a second time,
- //they will be contained in the opposite bound ...
- while (E != Result && IsHorizontal(*E)) E = E->Prev;
- }
- else
- {
- while (E->Top.Y == E->Prev->Bot.Y) E = E->Prev;
- while (E != Result && IsHorizontal(*E)) E = E->Next;
- }
-
- if (E == Result)
- {
- if (NextIsForward) Result = E->Next;
- else Result = E->Prev;
- }
- else
- {
- //there are more edges in the bound beyond result starting with E
- if (NextIsForward)
- E = Result->Next;
- else
- E = Result->Prev;
- MinimaList::value_type locMin;
- locMin.Y = E->Bot.Y;
- locMin.LeftBound = 0;
- locMin.RightBound = E;
- E->WindDelta = 0;
- Result = ProcessBound(E, NextIsForward);
- m_MinimaList.push_back(locMin);
- }
- return Result;
- }
-
- TEdge *EStart;
-
- if (IsHorizontal(*E))
- {
- //We need to be careful with open paths because this may not be a
- //true local minima (ie E may be following a skip edge).
- //Also, consecutive horz. edges may start heading left before going right.
- if (NextIsForward)
- EStart = E->Prev;
- else
- EStart = E->Next;
- if (IsHorizontal(*EStart)) //ie an adjoining horizontal skip edge
- {
- if (EStart->Bot.X != E->Bot.X && EStart->Top.X != E->Bot.X)
- ReverseHorizontal(*E);
- }
- else if (EStart->Bot.X != E->Bot.X)
- ReverseHorizontal(*E);
- }
-
- EStart = E;
- if (NextIsForward)
- {
- while (Result->Top.Y == Result->Next->Bot.Y && Result->Next->OutIdx != Skip)
- Result = Result->Next;
- if (IsHorizontal(*Result) && Result->Next->OutIdx != Skip)
- {
- //nb: at the top of a bound, horizontals are added to the bound
- //only when the preceding edge attaches to the horizontal's left vertex
- //unless a Skip edge is encountered when that becomes the top divide
- Horz = Result;
- while (IsHorizontal(*Horz->Prev)) Horz = Horz->Prev;
- if (Horz->Prev->Top.X > Result->Next->Top.X) Result = Horz->Prev;
- }
- while (E != Result)
- {
- E->NextInLML = E->Next;
- if (IsHorizontal(*E) && E != EStart &&
- E->Bot.X != E->Prev->Top.X) ReverseHorizontal(*E);
- E = E->Next;
- }
- if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Prev->Top.X)
- ReverseHorizontal(*E);
- Result = Result->Next; //move to the edge just beyond current bound
- } else
- {
- while (Result->Top.Y == Result->Prev->Bot.Y && Result->Prev->OutIdx != Skip)
- Result = Result->Prev;
- if (IsHorizontal(*Result) && Result->Prev->OutIdx != Skip)
- {
- Horz = Result;
- while (IsHorizontal(*Horz->Next)) Horz = Horz->Next;
- if (Horz->Next->Top.X == Result->Prev->Top.X ||
- Horz->Next->Top.X > Result->Prev->Top.X) Result = Horz->Next;
- }
-
- while (E != Result)
- {
- E->NextInLML = E->Prev;
- if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X)
- ReverseHorizontal(*E);
- E = E->Prev;
- }
- if (IsHorizontal(*E) && E != EStart && E->Bot.X != E->Next->Top.X)
- ReverseHorizontal(*E);
- Result = Result->Prev; //move to the edge just beyond current bound
- }
-
- return Result;
-}
-//------------------------------------------------------------------------------
-
-bool ClipperBase::AddPath(const Path &pg, PolyType PolyTyp, bool Closed)
-{
-#ifdef use_lines
- if (!Closed && PolyTyp == ptClip)
- throw clipperException("AddPath: Open paths must be subject.");
-#else
- if (!Closed)
- throw clipperException("AddPath: Open paths have been disabled.");
-#endif
-
- int highI = (int)pg.size() -1;
- if (Closed) while (highI > 0 && (pg[highI] == pg[0])) --highI;
- while (highI > 0 && (pg[highI] == pg[highI -1])) --highI;
- if ((Closed && highI < 2) || (!Closed && highI < 1)) return false;
-
- //create a new edge array ...
- TEdge *edges = new TEdge [highI +1];
-
- bool IsFlat = true;
- //1. Basic (first) edge initialization ...
- try
- {
- edges[1].Curr = pg[1];
- RangeTest(pg[0], m_UseFullRange);
- RangeTest(pg[highI], m_UseFullRange);
- InitEdge(&edges[0], &edges[1], &edges[highI], pg[0]);
- InitEdge(&edges[highI], &edges[0], &edges[highI-1], pg[highI]);
- for (int i = highI - 1; i >= 1; --i)
- {
- RangeTest(pg[i], m_UseFullRange);
- InitEdge(&edges[i], &edges[i+1], &edges[i-1], pg[i]);
- }
- }
- catch(...)
- {
- delete [] edges;
- throw; //range test fails
- }
- TEdge *eStart = &edges[0];
-
- //2. Remove duplicate vertices, and (when closed) collinear edges ...
- TEdge *E = eStart, *eLoopStop = eStart;
- for (;;)
- {
- //nb: allows matching start and end points when not Closed ...
- if (E->Curr == E->Next->Curr && (Closed || E->Next != eStart))
- {
- if (E == E->Next) break;
- if (E == eStart) eStart = E->Next;
- E = RemoveEdge(E);
- eLoopStop = E;
- continue;
- }
- if (E->Prev == E->Next)
- break; //only two vertices
- else if (Closed &&
- SlopesEqual(E->Prev->Curr, E->Curr, E->Next->Curr, m_UseFullRange) &&
- (!m_PreserveCollinear ||
- !Pt2IsBetweenPt1AndPt3(E->Prev->Curr, E->Curr, E->Next->Curr)))
- {
- //Collinear edges are allowed for open paths but in closed paths
- //the default is to merge adjacent collinear edges into a single edge.
- //However, if the PreserveCollinear property is enabled, only overlapping
- //collinear edges (ie spikes) will be removed from closed paths.
- if (E == eStart) eStart = E->Next;
- E = RemoveEdge(E);
- E = E->Prev;
- eLoopStop = E;
- continue;
- }
- E = E->Next;
- if ((E == eLoopStop) || (!Closed && E->Next == eStart)) break;
- }
-
- if ((!Closed && (E == E->Next)) || (Closed && (E->Prev == E->Next)))
- {
- delete [] edges;
- return false;
- }
-
- if (!Closed)
- {
- m_HasOpenPaths = true;
- eStart->Prev->OutIdx = Skip;
- }
-
- //3. Do second stage of edge initialization ...
- E = eStart;
- do
- {
- InitEdge2(*E, PolyTyp);
- E = E->Next;
- if (IsFlat && E->Curr.Y != eStart->Curr.Y) IsFlat = false;
- }
- while (E != eStart);
-
- //4. Finally, add edge bounds to LocalMinima list ...
-
- //Totally flat paths must be handled differently when adding them
- //to LocalMinima list to avoid endless loops etc ...
- if (IsFlat)
- {
- if (Closed)
- {
- delete [] edges;
- return false;
- }
- E->Prev->OutIdx = Skip;
- MinimaList::value_type locMin;
- locMin.Y = E->Bot.Y;
- locMin.LeftBound = 0;
- locMin.RightBound = E;
- locMin.RightBound->Side = esRight;
- locMin.RightBound->WindDelta = 0;
- for (;;)
- {
- if (E->Bot.X != E->Prev->Top.X) ReverseHorizontal(*E);
- if (E->Next->OutIdx == Skip) break;
- E->NextInLML = E->Next;
- E = E->Next;
- }
- m_MinimaList.push_back(locMin);
- m_edges.push_back(edges);
- return true;
- }
-
- m_edges.push_back(edges);
- bool leftBoundIsForward;
- TEdge* EMin = 0;
-
- //workaround to avoid an endless loop in the while loop below when
- //open paths have matching start and end points ...
- if (E->Prev->Bot == E->Prev->Top) E = E->Next;
-
- for (;;)
- {
- E = FindNextLocMin(E);
- if (E == EMin) break;
- else if (!EMin) EMin = E;
-
- //E and E.Prev now share a local minima (left aligned if horizontal).
- //Compare their slopes to find which starts which bound ...
- MinimaList::value_type locMin;
- locMin.Y = E->Bot.Y;
- if (E->Dx < E->Prev->Dx)
- {
- locMin.LeftBound = E->Prev;
- locMin.RightBound = E;
- leftBoundIsForward = false; //Q.nextInLML = Q.prev
- } else
- {
- locMin.LeftBound = E;
- locMin.RightBound = E->Prev;
- leftBoundIsForward = true; //Q.nextInLML = Q.next
- }
-
- if (!Closed) locMin.LeftBound->WindDelta = 0;
- else if (locMin.LeftBound->Next == locMin.RightBound)
- locMin.LeftBound->WindDelta = -1;
- else locMin.LeftBound->WindDelta = 1;
- locMin.RightBound->WindDelta = -locMin.LeftBound->WindDelta;
-
- E = ProcessBound(locMin.LeftBound, leftBoundIsForward);
- if (E->OutIdx == Skip) E = ProcessBound(E, leftBoundIsForward);
-
- TEdge* E2 = ProcessBound(locMin.RightBound, !leftBoundIsForward);
- if (E2->OutIdx == Skip) E2 = ProcessBound(E2, !leftBoundIsForward);
-
- if (locMin.LeftBound->OutIdx == Skip)
- locMin.LeftBound = 0;
- else if (locMin.RightBound->OutIdx == Skip)
- locMin.RightBound = 0;
- m_MinimaList.push_back(locMin);
- if (!leftBoundIsForward) E = E2;
- }
- return true;
-}
-//------------------------------------------------------------------------------
-
-bool ClipperBase::AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed)
-{
- bool result = false;
- for (Paths::size_type i = 0; i < ppg.size(); ++i)
- if (AddPath(ppg[i], PolyTyp, Closed)) result = true;
- return result;
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::Clear()
-{
- DisposeLocalMinimaList();
- for (EdgeList::size_type i = 0; i < m_edges.size(); ++i)
- {
- TEdge* edges = m_edges[i];
- delete [] edges;
- }
- m_edges.clear();
- m_UseFullRange = false;
- m_HasOpenPaths = false;
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::Reset()
-{
- m_CurrentLM = m_MinimaList.begin();
- if (m_CurrentLM == m_MinimaList.end()) return; //ie nothing to process
- std::sort(m_MinimaList.begin(), m_MinimaList.end(), LocMinSorter());
-
- m_Scanbeam = ScanbeamList(); //clears/resets priority_queue
- //reset all edges ...
- for (MinimaList::iterator lm = m_MinimaList.begin(); lm != m_MinimaList.end(); ++lm)
- {
- InsertScanbeam(lm->Y);
- TEdge* e = lm->LeftBound;
- if (e)
- {
- e->Curr = e->Bot;
- e->Side = esLeft;
- e->OutIdx = Unassigned;
- }
-
- e = lm->RightBound;
- if (e)
- {
- e->Curr = e->Bot;
- e->Side = esRight;
- e->OutIdx = Unassigned;
- }
- }
- m_ActiveEdges = 0;
- m_CurrentLM = m_MinimaList.begin();
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::DisposeLocalMinimaList()
-{
- m_MinimaList.clear();
- m_CurrentLM = m_MinimaList.begin();
-}
-//------------------------------------------------------------------------------
-
-bool ClipperBase::PopLocalMinima(cInt Y, const LocalMinimum *&locMin)
-{
- if (m_CurrentLM == m_MinimaList.end() || (*m_CurrentLM).Y != Y) return false;
- locMin = &(*m_CurrentLM);
- ++m_CurrentLM;
- return true;
-}
-//------------------------------------------------------------------------------
-
-IntRect ClipperBase::GetBounds()
-{
- IntRect result;
- MinimaList::iterator lm = m_MinimaList.begin();
- if (lm == m_MinimaList.end())
- {
- result.left = result.top = result.right = result.bottom = 0;
- return result;
- }
- result.left = lm->LeftBound->Bot.X;
- result.top = lm->LeftBound->Bot.Y;
- result.right = lm->LeftBound->Bot.X;
- result.bottom = lm->LeftBound->Bot.Y;
- while (lm != m_MinimaList.end())
- {
- //todo - needs fixing for open paths
- result.bottom = std::max(result.bottom, lm->LeftBound->Bot.Y);
- TEdge* e = lm->LeftBound;
- for (;;) {
- TEdge* bottomE = e;
- while (e->NextInLML)
- {
- if (e->Bot.X < result.left) result.left = e->Bot.X;
- if (e->Bot.X > result.right) result.right = e->Bot.X;
- e = e->NextInLML;
- }
- result.left = std::min(result.left, e->Bot.X);
- result.right = std::max(result.right, e->Bot.X);
- result.left = std::min(result.left, e->Top.X);
- result.right = std::max(result.right, e->Top.X);
- result.top = std::min(result.top, e->Top.Y);
- if (bottomE == lm->LeftBound) e = lm->RightBound;
- else break;
- }
- ++lm;
- }
- return result;
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::InsertScanbeam(const cInt Y)
-{
- m_Scanbeam.push(Y);
-}
-//------------------------------------------------------------------------------
-
-bool ClipperBase::PopScanbeam(cInt &Y)
-{
- if (m_Scanbeam.empty()) return false;
- Y = m_Scanbeam.top();
- m_Scanbeam.pop();
- while (!m_Scanbeam.empty() && Y == m_Scanbeam.top()) { m_Scanbeam.pop(); } // Pop duplicates.
- return true;
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::DisposeAllOutRecs(){
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
- DisposeOutRec(i);
- m_PolyOuts.clear();
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::DisposeOutRec(PolyOutList::size_type index)
-{
- OutRec *outRec = m_PolyOuts[index];
- if (outRec->Pts) DisposeOutPts(outRec->Pts);
- delete outRec;
- m_PolyOuts[index] = 0;
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::DeleteFromAEL(TEdge *e)
-{
- TEdge* AelPrev = e->PrevInAEL;
- TEdge* AelNext = e->NextInAEL;
- if (!AelPrev && !AelNext && (e != m_ActiveEdges)) return; //already deleted
- if (AelPrev) AelPrev->NextInAEL = AelNext;
- else m_ActiveEdges = AelNext;
- if (AelNext) AelNext->PrevInAEL = AelPrev;
- e->NextInAEL = 0;
- e->PrevInAEL = 0;
-}
-//------------------------------------------------------------------------------
-
-OutRec* ClipperBase::CreateOutRec()
-{
- OutRec* result = new OutRec;
- result->IsHole = false;
- result->IsOpen = false;
- result->FirstLeft = 0;
- result->Pts = 0;
- result->BottomPt = 0;
- result->PolyNd = 0;
- m_PolyOuts.push_back(result);
- result->Idx = (int)m_PolyOuts.size() - 1;
- return result;
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::SwapPositionsInAEL(TEdge *Edge1, TEdge *Edge2)
-{
- //check that one or other edge hasn't already been removed from AEL ...
- if (Edge1->NextInAEL == Edge1->PrevInAEL ||
- Edge2->NextInAEL == Edge2->PrevInAEL) return;
-
- if (Edge1->NextInAEL == Edge2)
- {
- TEdge* Next = Edge2->NextInAEL;
- if (Next) Next->PrevInAEL = Edge1;
- TEdge* Prev = Edge1->PrevInAEL;
- if (Prev) Prev->NextInAEL = Edge2;
- Edge2->PrevInAEL = Prev;
- Edge2->NextInAEL = Edge1;
- Edge1->PrevInAEL = Edge2;
- Edge1->NextInAEL = Next;
- }
- else if (Edge2->NextInAEL == Edge1)
- {
- TEdge* Next = Edge1->NextInAEL;
- if (Next) Next->PrevInAEL = Edge2;
- TEdge* Prev = Edge2->PrevInAEL;
- if (Prev) Prev->NextInAEL = Edge1;
- Edge1->PrevInAEL = Prev;
- Edge1->NextInAEL = Edge2;
- Edge2->PrevInAEL = Edge1;
- Edge2->NextInAEL = Next;
- }
- else
- {
- TEdge* Next = Edge1->NextInAEL;
- TEdge* Prev = Edge1->PrevInAEL;
- Edge1->NextInAEL = Edge2->NextInAEL;
- if (Edge1->NextInAEL) Edge1->NextInAEL->PrevInAEL = Edge1;
- Edge1->PrevInAEL = Edge2->PrevInAEL;
- if (Edge1->PrevInAEL) Edge1->PrevInAEL->NextInAEL = Edge1;
- Edge2->NextInAEL = Next;
- if (Edge2->NextInAEL) Edge2->NextInAEL->PrevInAEL = Edge2;
- Edge2->PrevInAEL = Prev;
- if (Edge2->PrevInAEL) Edge2->PrevInAEL->NextInAEL = Edge2;
- }
-
- if (!Edge1->PrevInAEL) m_ActiveEdges = Edge1;
- else if (!Edge2->PrevInAEL) m_ActiveEdges = Edge2;
-}
-//------------------------------------------------------------------------------
-
-void ClipperBase::UpdateEdgeIntoAEL(TEdge *&e)
-{
- if (!e->NextInLML)
- throw clipperException("UpdateEdgeIntoAEL: invalid call");
-
- e->NextInLML->OutIdx = e->OutIdx;
- TEdge* AelPrev = e->PrevInAEL;
- TEdge* AelNext = e->NextInAEL;
- if (AelPrev) AelPrev->NextInAEL = e->NextInLML;
- else m_ActiveEdges = e->NextInLML;
- if (AelNext) AelNext->PrevInAEL = e->NextInLML;
- e->NextInLML->Side = e->Side;
- e->NextInLML->WindDelta = e->WindDelta;
- e->NextInLML->WindCnt = e->WindCnt;
- e->NextInLML->WindCnt2 = e->WindCnt2;
- e = e->NextInLML;
- e->Curr = e->Bot;
- e->PrevInAEL = AelPrev;
- e->NextInAEL = AelNext;
- if (!IsHorizontal(*e)) InsertScanbeam(e->Top.Y);
-}
-//------------------------------------------------------------------------------
-
-bool ClipperBase::LocalMinimaPending()
-{
- return (m_CurrentLM != m_MinimaList.end());
-}
-
-//------------------------------------------------------------------------------
-// TClipper methods ...
-//------------------------------------------------------------------------------
-
-Clipper::Clipper(int initOptions) : ClipperBase() //constructor
-{
- m_ExecuteLocked = false;
- m_UseFullRange = false;
- m_ReverseOutput = ((initOptions & ioReverseSolution) != 0);
- m_StrictSimple = ((initOptions & ioStrictlySimple) != 0);
- m_PreserveCollinear = ((initOptions & ioPreserveCollinear) != 0);
- m_HasOpenPaths = false;
-#ifdef use_xyz
- m_ZFill = 0;
-#endif
-}
-//------------------------------------------------------------------------------
-
-#ifdef use_xyz
-void Clipper::ZFillFunction(ZFillCallback zFillFunc)
-{
- m_ZFill = zFillFunc;
-}
-//------------------------------------------------------------------------------
-#endif
-
-bool Clipper::Execute(ClipType clipType, Paths &solution, PolyFillType fillType)
-{
- return Execute(clipType, solution, fillType, fillType);
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::Execute(ClipType clipType, PolyTree &polytree, PolyFillType fillType)
-{
- return Execute(clipType, polytree, fillType, fillType);
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::Execute(ClipType clipType, Paths &solution,
- PolyFillType subjFillType, PolyFillType clipFillType)
-{
- if( m_ExecuteLocked ) return false;
- if (m_HasOpenPaths)
- throw clipperException("Error: PolyTree struct is needed for open path clipping.");
- m_ExecuteLocked = true;
- solution.resize(0);
- m_SubjFillType = subjFillType;
- m_ClipFillType = clipFillType;
- m_ClipType = clipType;
- m_UsingPolyTree = false;
- bool succeeded = ExecuteInternal();
- if (succeeded) BuildResult(solution);
- DisposeAllOutRecs();
- m_ExecuteLocked = false;
- return succeeded;
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::Execute(ClipType clipType, PolyTree& polytree,
- PolyFillType subjFillType, PolyFillType clipFillType)
-{
- if( m_ExecuteLocked ) return false;
- m_ExecuteLocked = true;
- m_SubjFillType = subjFillType;
- m_ClipFillType = clipFillType;
- m_ClipType = clipType;
- m_UsingPolyTree = true;
- bool succeeded = ExecuteInternal();
- if (succeeded) BuildResult2(polytree);
- DisposeAllOutRecs();
- m_ExecuteLocked = false;
- return succeeded;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::FixHoleLinkage(OutRec &outrec)
-{
- //skip OutRecs that (a) contain outermost polygons or
- //(b) already have the correct owner/child linkage ...
- if (!outrec.FirstLeft ||
- (outrec.IsHole != outrec.FirstLeft->IsHole &&
- outrec.FirstLeft->Pts)) return;
-
- OutRec* orfl = outrec.FirstLeft;
- while (orfl && ((orfl->IsHole == outrec.IsHole) || !orfl->Pts))
- orfl = orfl->FirstLeft;
- outrec.FirstLeft = orfl;
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::ExecuteInternal()
-{
- bool succeeded = true;
- try {
- Reset();
- m_Maxima = MaximaList();
- m_SortedEdges = 0;
-
- succeeded = true;
- cInt botY, topY;
- if (!PopScanbeam(botY)) return false;
- InsertLocalMinimaIntoAEL(botY);
- while (PopScanbeam(topY) || LocalMinimaPending())
- {
- ProcessHorizontals();
- ClearGhostJoins();
- if (!ProcessIntersections(topY))
- {
- succeeded = false;
- break;
- }
- ProcessEdgesAtTopOfScanbeam(topY);
- botY = topY;
- InsertLocalMinimaIntoAEL(botY);
- }
- }
- catch(...)
- {
- succeeded = false;
- }
-
- if (succeeded)
- {
- //fix orientations ...
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
- {
- OutRec *outRec = m_PolyOuts[i];
- if (!outRec->Pts || outRec->IsOpen) continue;
- if ((outRec->IsHole ^ m_ReverseOutput) == (Area(*outRec) > 0))
- ReversePolyPtLinks(outRec->Pts);
- }
-
- if (!m_Joins.empty()) JoinCommonEdges();
-
- //unfortunately FixupOutPolygon() must be done after JoinCommonEdges()
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
- {
- OutRec *outRec = m_PolyOuts[i];
- if (!outRec->Pts) continue;
- if (outRec->IsOpen)
- FixupOutPolyline(*outRec);
- else
- FixupOutPolygon(*outRec);
- }
-
- if (m_StrictSimple) DoSimplePolygons();
- }
-
- ClearJoins();
- ClearGhostJoins();
- return succeeded;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::SetWindingCount(TEdge &edge)
-{
- TEdge *e = edge.PrevInAEL;
- //find the edge of the same polytype that immediately preceeds 'edge' in AEL
- while (e && ((e->PolyTyp != edge.PolyTyp) || (e->WindDelta == 0))) e = e->PrevInAEL;
- if (!e)
- {
- if (edge.WindDelta == 0)
- {
- PolyFillType pft = (edge.PolyTyp == ptSubject ? m_SubjFillType : m_ClipFillType);
- edge.WindCnt = (pft == pftNegative ? -1 : 1);
- }
- else
- edge.WindCnt = edge.WindDelta;
- edge.WindCnt2 = 0;
- e = m_ActiveEdges; //ie get ready to calc WindCnt2
- }
- else if (edge.WindDelta == 0 && m_ClipType != ctUnion)
- {
- edge.WindCnt = 1;
- edge.WindCnt2 = e->WindCnt2;
- e = e->NextInAEL; //ie get ready to calc WindCnt2
- }
- else if (IsEvenOddFillType(edge))
- {
- //EvenOdd filling ...
- if (edge.WindDelta == 0)
- {
- //are we inside a subj polygon ...
- bool Inside = true;
- TEdge *e2 = e->PrevInAEL;
- while (e2)
- {
- if (e2->PolyTyp == e->PolyTyp && e2->WindDelta != 0)
- Inside = !Inside;
- e2 = e2->PrevInAEL;
- }
- edge.WindCnt = (Inside ? 0 : 1);
- }
- else
- {
- edge.WindCnt = edge.WindDelta;
- }
- edge.WindCnt2 = e->WindCnt2;
- e = e->NextInAEL; //ie get ready to calc WindCnt2
- }
- else
- {
- //nonZero, Positive or Negative filling ...
- if (e->WindCnt * e->WindDelta < 0)
- {
- //prev edge is 'decreasing' WindCount (WC) toward zero
- //so we're outside the previous polygon ...
- if (Abs(e->WindCnt) > 1)
- {
- //outside prev poly but still inside another.
- //when reversing direction of prev poly use the same WC
- if (e->WindDelta * edge.WindDelta < 0) edge.WindCnt = e->WindCnt;
- //otherwise continue to 'decrease' WC ...
- else edge.WindCnt = e->WindCnt + edge.WindDelta;
- }
- else
- //now outside all polys of same polytype so set own WC ...
- edge.WindCnt = (edge.WindDelta == 0 ? 1 : edge.WindDelta);
- } else
- {
- //prev edge is 'increasing' WindCount (WC) away from zero
- //so we're inside the previous polygon ...
- if (edge.WindDelta == 0)
- edge.WindCnt = (e->WindCnt < 0 ? e->WindCnt - 1 : e->WindCnt + 1);
- //if wind direction is reversing prev then use same WC
- else if (e->WindDelta * edge.WindDelta < 0) edge.WindCnt = e->WindCnt;
- //otherwise add to WC ...
- else edge.WindCnt = e->WindCnt + edge.WindDelta;
- }
- edge.WindCnt2 = e->WindCnt2;
- e = e->NextInAEL; //ie get ready to calc WindCnt2
- }
-
- //update WindCnt2 ...
- if (IsEvenOddAltFillType(edge))
- {
- //EvenOdd filling ...
- while (e != &edge)
- {
- if (e->WindDelta != 0)
- edge.WindCnt2 = (edge.WindCnt2 == 0 ? 1 : 0);
- e = e->NextInAEL;
- }
- } else
- {
- //nonZero, Positive or Negative filling ...
- while ( e != &edge )
- {
- edge.WindCnt2 += e->WindDelta;
- e = e->NextInAEL;
- }
- }
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::IsEvenOddFillType(const TEdge& edge) const
-{
- if (edge.PolyTyp == ptSubject)
- return m_SubjFillType == pftEvenOdd; else
- return m_ClipFillType == pftEvenOdd;
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::IsEvenOddAltFillType(const TEdge& edge) const
-{
- if (edge.PolyTyp == ptSubject)
- return m_ClipFillType == pftEvenOdd; else
- return m_SubjFillType == pftEvenOdd;
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::IsContributing(const TEdge& edge) const
-{
- PolyFillType pft, pft2;
- if (edge.PolyTyp == ptSubject)
- {
- pft = m_SubjFillType;
- pft2 = m_ClipFillType;
- } else
- {
- pft = m_ClipFillType;
- pft2 = m_SubjFillType;
- }
-
- switch(pft)
- {
- case pftEvenOdd:
- //return false if a subj line has been flagged as inside a subj polygon
- if (edge.WindDelta == 0 && edge.WindCnt != 1) return false;
- break;
- case pftNonZero:
- if (Abs(edge.WindCnt) != 1) return false;
- break;
- case pftPositive:
- if (edge.WindCnt != 1) return false;
- break;
- default: //pftNegative
- if (edge.WindCnt != -1) return false;
- }
-
- switch(m_ClipType)
- {
- case ctIntersection:
- switch(pft2)
- {
- case pftEvenOdd:
- case pftNonZero:
- return (edge.WindCnt2 != 0);
- case pftPositive:
- return (edge.WindCnt2 > 0);
- default:
- return (edge.WindCnt2 < 0);
- }
- break;
- case ctUnion:
- switch(pft2)
- {
- case pftEvenOdd:
- case pftNonZero:
- return (edge.WindCnt2 == 0);
- case pftPositive:
- return (edge.WindCnt2 <= 0);
- default:
- return (edge.WindCnt2 >= 0);
- }
- break;
- case ctDifference:
- if (edge.PolyTyp == ptSubject)
- switch(pft2)
- {
- case pftEvenOdd:
- case pftNonZero:
- return (edge.WindCnt2 == 0);
- case pftPositive:
- return (edge.WindCnt2 <= 0);
- default:
- return (edge.WindCnt2 >= 0);
- }
- else
- switch(pft2)
- {
- case pftEvenOdd:
- case pftNonZero:
- return (edge.WindCnt2 != 0);
- case pftPositive:
- return (edge.WindCnt2 > 0);
- default:
- return (edge.WindCnt2 < 0);
- }
- break;
- case ctXor:
- if (edge.WindDelta == 0) //XOr always contributing unless open
- switch(pft2)
- {
- case pftEvenOdd:
- case pftNonZero:
- return (edge.WindCnt2 == 0);
- case pftPositive:
- return (edge.WindCnt2 <= 0);
- default:
- return (edge.WindCnt2 >= 0);
- }
- else
- return true;
- break;
- default:
- return true;
- }
-}
-//------------------------------------------------------------------------------
-
-OutPt* Clipper::AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt)
-{
- OutPt* result;
- TEdge *e, *prevE;
- if (IsHorizontal(*e2) || ( e1->Dx > e2->Dx ))
- {
- result = AddOutPt(e1, Pt);
- e2->OutIdx = e1->OutIdx;
- e1->Side = esLeft;
- e2->Side = esRight;
- e = e1;
- if (e->PrevInAEL == e2)
- prevE = e2->PrevInAEL;
- else
- prevE = e->PrevInAEL;
- } else
- {
- result = AddOutPt(e2, Pt);
- e1->OutIdx = e2->OutIdx;
- e1->Side = esRight;
- e2->Side = esLeft;
- e = e2;
- if (e->PrevInAEL == e1)
- prevE = e1->PrevInAEL;
- else
- prevE = e->PrevInAEL;
- }
-
- if (prevE && prevE->OutIdx >= 0)
- {
- cInt xPrev = TopX(*prevE, Pt.Y);
- cInt xE = TopX(*e, Pt.Y);
- if (xPrev == xE && (e->WindDelta != 0) && (prevE->WindDelta != 0) &&
- SlopesEqual(IntPoint(xPrev, Pt.Y), prevE->Top, IntPoint(xE, Pt.Y), e->Top, m_UseFullRange))
- {
- OutPt* outPt = AddOutPt(prevE, Pt);
- AddJoin(result, outPt, e->Top);
- }
- }
- return result;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &Pt)
-{
- AddOutPt( e1, Pt );
- if (e2->WindDelta == 0) AddOutPt(e2, Pt);
- if( e1->OutIdx == e2->OutIdx )
- {
- e1->OutIdx = Unassigned;
- e2->OutIdx = Unassigned;
- }
- else if (e1->OutIdx < e2->OutIdx)
- AppendPolygon(e1, e2);
- else
- AppendPolygon(e2, e1);
-}
-//------------------------------------------------------------------------------
-
-void Clipper::AddEdgeToSEL(TEdge *edge)
-{
- //SEL pointers in PEdge are reused to build a list of horizontal edges.
- //However, we don't need to worry about order with horizontal edge processing.
- if( !m_SortedEdges )
- {
- m_SortedEdges = edge;
- edge->PrevInSEL = 0;
- edge->NextInSEL = 0;
- }
- else
- {
- edge->NextInSEL = m_SortedEdges;
- edge->PrevInSEL = 0;
- m_SortedEdges->PrevInSEL = edge;
- m_SortedEdges = edge;
- }
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::PopEdgeFromSEL(TEdge *&edge)
-{
- if (!m_SortedEdges) return false;
- edge = m_SortedEdges;
- DeleteFromSEL(m_SortedEdges);
- return true;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::CopyAELToSEL()
-{
- TEdge* e = m_ActiveEdges;
- m_SortedEdges = e;
- while ( e )
- {
- e->PrevInSEL = e->PrevInAEL;
- e->NextInSEL = e->NextInAEL;
- e = e->NextInAEL;
- }
-}
-//------------------------------------------------------------------------------
-
-void Clipper::AddJoin(OutPt *op1, OutPt *op2, const IntPoint OffPt)
-{
- Join* j = new Join;
- j->OutPt1 = op1;
- j->OutPt2 = op2;
- j->OffPt = OffPt;
- m_Joins.push_back(j);
-}
-//------------------------------------------------------------------------------
-
-void Clipper::ClearJoins()
-{
- for (JoinList::size_type i = 0; i < m_Joins.size(); i++)
- delete m_Joins[i];
- m_Joins.resize(0);
-}
-//------------------------------------------------------------------------------
-
-void Clipper::ClearGhostJoins()
-{
- for (JoinList::size_type i = 0; i < m_GhostJoins.size(); i++)
- delete m_GhostJoins[i];
- m_GhostJoins.resize(0);
-}
-//------------------------------------------------------------------------------
-
-void Clipper::AddGhostJoin(OutPt *op, const IntPoint OffPt)
-{
- Join* j = new Join;
- j->OutPt1 = op;
- j->OutPt2 = 0;
- j->OffPt = OffPt;
- m_GhostJoins.push_back(j);
-}
-//------------------------------------------------------------------------------
-
-void Clipper::InsertLocalMinimaIntoAEL(const cInt botY)
-{
- const LocalMinimum *lm;
- while (PopLocalMinima(botY, lm))
- {
- TEdge* lb = lm->LeftBound;
- TEdge* rb = lm->RightBound;
-
- OutPt *Op1 = 0;
- if (!lb)
- {
- //nb: don't insert LB into either AEL or SEL
- InsertEdgeIntoAEL(rb, 0);
- SetWindingCount(*rb);
- if (IsContributing(*rb))
- Op1 = AddOutPt(rb, rb->Bot);
- }
- else if (!rb)
- {
- InsertEdgeIntoAEL(lb, 0);
- SetWindingCount(*lb);
- if (IsContributing(*lb))
- Op1 = AddOutPt(lb, lb->Bot);
- InsertScanbeam(lb->Top.Y);
- }
- else
- {
- InsertEdgeIntoAEL(lb, 0);
- InsertEdgeIntoAEL(rb, lb);
- SetWindingCount( *lb );
- rb->WindCnt = lb->WindCnt;
- rb->WindCnt2 = lb->WindCnt2;
- if (IsContributing(*lb))
- Op1 = AddLocalMinPoly(lb, rb, lb->Bot);
- InsertScanbeam(lb->Top.Y);
- }
-
- if (rb)
- {
- if (IsHorizontal(*rb))
- {
- AddEdgeToSEL(rb);
- if (rb->NextInLML)
- InsertScanbeam(rb->NextInLML->Top.Y);
- }
- else InsertScanbeam( rb->Top.Y );
- }
-
- if (!lb || !rb) continue;
-
- //if any output polygons share an edge, they'll need joining later ...
- if (Op1 && IsHorizontal(*rb) &&
- m_GhostJoins.size() > 0 && (rb->WindDelta != 0))
- {
- for (JoinList::size_type i = 0; i < m_GhostJoins.size(); ++i)
- {
- Join* jr = m_GhostJoins[i];
- //if the horizontal Rb and a 'ghost' horizontal overlap, then convert
- //the 'ghost' join to a real join ready for later ...
- if (HorzSegmentsOverlap(jr->OutPt1->Pt.X, jr->OffPt.X, rb->Bot.X, rb->Top.X))
- AddJoin(jr->OutPt1, Op1, jr->OffPt);
- }
- }
-
- if (lb->OutIdx >= 0 && lb->PrevInAEL &&
- lb->PrevInAEL->Curr.X == lb->Bot.X &&
- lb->PrevInAEL->OutIdx >= 0 &&
- SlopesEqual(lb->PrevInAEL->Bot, lb->PrevInAEL->Top, lb->Curr, lb->Top, m_UseFullRange) &&
- (lb->WindDelta != 0) && (lb->PrevInAEL->WindDelta != 0))
- {
- OutPt *Op2 = AddOutPt(lb->PrevInAEL, lb->Bot);
- AddJoin(Op1, Op2, lb->Top);
- }
-
- if(lb->NextInAEL != rb)
- {
-
- if (rb->OutIdx >= 0 && rb->PrevInAEL->OutIdx >= 0 &&
- SlopesEqual(rb->PrevInAEL->Curr, rb->PrevInAEL->Top, rb->Curr, rb->Top, m_UseFullRange) &&
- (rb->WindDelta != 0) && (rb->PrevInAEL->WindDelta != 0))
- {
- OutPt *Op2 = AddOutPt(rb->PrevInAEL, rb->Bot);
- AddJoin(Op1, Op2, rb->Top);
- }
-
- TEdge* e = lb->NextInAEL;
- if (e)
- {
- while( e != rb )
- {
- //nb: For calculating winding counts etc, IntersectEdges() assumes
- //that param1 will be to the Right of param2 ABOVE the intersection ...
- IntersectEdges(rb , e , lb->Curr); //order important here
- e = e->NextInAEL;
- }
- }
- }
-
- }
-}
-//------------------------------------------------------------------------------
-
-void Clipper::DeleteFromSEL(TEdge *e)
-{
- TEdge* SelPrev = e->PrevInSEL;
- TEdge* SelNext = e->NextInSEL;
- if( !SelPrev && !SelNext && (e != m_SortedEdges) ) return; //already deleted
- if( SelPrev ) SelPrev->NextInSEL = SelNext;
- else m_SortedEdges = SelNext;
- if( SelNext ) SelNext->PrevInSEL = SelPrev;
- e->NextInSEL = 0;
- e->PrevInSEL = 0;
-}
-//------------------------------------------------------------------------------
-
-#ifdef use_xyz
-void Clipper::SetZ(IntPoint& pt, TEdge& e1, TEdge& e2)
-{
- if (pt.Z != 0 || !m_ZFill) return;
- else if (pt == e1.Bot) pt.Z = e1.Bot.Z;
- else if (pt == e1.Top) pt.Z = e1.Top.Z;
- else if (pt == e2.Bot) pt.Z = e2.Bot.Z;
- else if (pt == e2.Top) pt.Z = e2.Top.Z;
- else (*m_ZFill)(e1.Bot, e1.Top, e2.Bot, e2.Top, pt);
-}
-//------------------------------------------------------------------------------
-#endif
-
-void Clipper::IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &Pt)
-{
- bool e1Contributing = ( e1->OutIdx >= 0 );
- bool e2Contributing = ( e2->OutIdx >= 0 );
-
-#ifdef use_xyz
- SetZ(Pt, *e1, *e2);
-#endif
-
-#ifdef use_lines
- //if either edge is on an OPEN path ...
- if (e1->WindDelta == 0 || e2->WindDelta == 0)
- {
- //ignore subject-subject open path intersections UNLESS they
- //are both open paths, AND they are both 'contributing maximas' ...
- if (e1->WindDelta == 0 && e2->WindDelta == 0) return;
-
- //if intersecting a subj line with a subj poly ...
- else if (e1->PolyTyp == e2->PolyTyp &&
- e1->WindDelta != e2->WindDelta && m_ClipType == ctUnion)
- {
- if (e1->WindDelta == 0)
- {
- if (e2Contributing)
- {
- AddOutPt(e1, Pt);
- if (e1Contributing) e1->OutIdx = Unassigned;
- }
- }
- else
- {
- if (e1Contributing)
- {
- AddOutPt(e2, Pt);
- if (e2Contributing) e2->OutIdx = Unassigned;
- }
- }
- }
- else if (e1->PolyTyp != e2->PolyTyp)
- {
- //toggle subj open path OutIdx on/off when Abs(clip.WndCnt) == 1 ...
- if ((e1->WindDelta == 0) && abs(e2->WindCnt) == 1 &&
- (m_ClipType != ctUnion || e2->WindCnt2 == 0))
- {
- AddOutPt(e1, Pt);
- if (e1Contributing) e1->OutIdx = Unassigned;
- }
- else if ((e2->WindDelta == 0) && (abs(e1->WindCnt) == 1) &&
- (m_ClipType != ctUnion || e1->WindCnt2 == 0))
- {
- AddOutPt(e2, Pt);
- if (e2Contributing) e2->OutIdx = Unassigned;
- }
- }
- return;
- }
-#endif
-
- //update winding counts...
- //assumes that e1 will be to the Right of e2 ABOVE the intersection
- if ( e1->PolyTyp == e2->PolyTyp )
- {
- if ( IsEvenOddFillType( *e1) )
- {
- int oldE1WindCnt = e1->WindCnt;
- e1->WindCnt = e2->WindCnt;
- e2->WindCnt = oldE1WindCnt;
- } else
- {
- if (e1->WindCnt + e2->WindDelta == 0 ) e1->WindCnt = -e1->WindCnt;
- else e1->WindCnt += e2->WindDelta;
- if ( e2->WindCnt - e1->WindDelta == 0 ) e2->WindCnt = -e2->WindCnt;
- else e2->WindCnt -= e1->WindDelta;
- }
- } else
- {
- if (!IsEvenOddFillType(*e2)) e1->WindCnt2 += e2->WindDelta;
- else e1->WindCnt2 = ( e1->WindCnt2 == 0 ) ? 1 : 0;
- if (!IsEvenOddFillType(*e1)) e2->WindCnt2 -= e1->WindDelta;
- else e2->WindCnt2 = ( e2->WindCnt2 == 0 ) ? 1 : 0;
- }
-
- PolyFillType e1FillType, e2FillType, e1FillType2, e2FillType2;
- if (e1->PolyTyp == ptSubject)
- {
- e1FillType = m_SubjFillType;
- e1FillType2 = m_ClipFillType;
- } else
- {
- e1FillType = m_ClipFillType;
- e1FillType2 = m_SubjFillType;
- }
- if (e2->PolyTyp == ptSubject)
- {
- e2FillType = m_SubjFillType;
- e2FillType2 = m_ClipFillType;
- } else
- {
- e2FillType = m_ClipFillType;
- e2FillType2 = m_SubjFillType;
- }
-
- cInt e1Wc, e2Wc;
- switch (e1FillType)
- {
- case pftPositive: e1Wc = e1->WindCnt; break;
- case pftNegative: e1Wc = -e1->WindCnt; break;
- default: e1Wc = Abs(e1->WindCnt);
- }
- switch(e2FillType)
- {
- case pftPositive: e2Wc = e2->WindCnt; break;
- case pftNegative: e2Wc = -e2->WindCnt; break;
- default: e2Wc = Abs(e2->WindCnt);
- }
-
- if ( e1Contributing && e2Contributing )
- {
- if ((e1Wc != 0 && e1Wc != 1) || (e2Wc != 0 && e2Wc != 1) ||
- (e1->PolyTyp != e2->PolyTyp && m_ClipType != ctXor) )
- {
- AddLocalMaxPoly(e1, e2, Pt);
- }
- else
- {
- AddOutPt(e1, Pt);
- AddOutPt(e2, Pt);
- SwapSides( *e1 , *e2 );
- SwapPolyIndexes( *e1 , *e2 );
- }
- }
- else if ( e1Contributing )
- {
- if (e2Wc == 0 || e2Wc == 1)
- {
- AddOutPt(e1, Pt);
- SwapSides(*e1, *e2);
- SwapPolyIndexes(*e1, *e2);
- }
- }
- else if ( e2Contributing )
- {
- if (e1Wc == 0 || e1Wc == 1)
- {
- AddOutPt(e2, Pt);
- SwapSides(*e1, *e2);
- SwapPolyIndexes(*e1, *e2);
- }
- }
- else if ( (e1Wc == 0 || e1Wc == 1) && (e2Wc == 0 || e2Wc == 1))
- {
- //neither edge is currently contributing ...
-
- cInt e1Wc2, e2Wc2;
- switch (e1FillType2)
- {
- case pftPositive: e1Wc2 = e1->WindCnt2; break;
- case pftNegative : e1Wc2 = -e1->WindCnt2; break;
- default: e1Wc2 = Abs(e1->WindCnt2);
- }
- switch (e2FillType2)
- {
- case pftPositive: e2Wc2 = e2->WindCnt2; break;
- case pftNegative: e2Wc2 = -e2->WindCnt2; break;
- default: e2Wc2 = Abs(e2->WindCnt2);
- }
-
- if (e1->PolyTyp != e2->PolyTyp)
- {
- AddLocalMinPoly(e1, e2, Pt);
- }
- else if (e1Wc == 1 && e2Wc == 1)
- switch( m_ClipType ) {
- case ctIntersection:
- if (e1Wc2 > 0 && e2Wc2 > 0)
- AddLocalMinPoly(e1, e2, Pt);
- break;
- case ctUnion:
- if ( e1Wc2 <= 0 && e2Wc2 <= 0 )
- AddLocalMinPoly(e1, e2, Pt);
- break;
- case ctDifference:
- if (((e1->PolyTyp == ptClip) && (e1Wc2 > 0) && (e2Wc2 > 0)) ||
- ((e1->PolyTyp == ptSubject) && (e1Wc2 <= 0) && (e2Wc2 <= 0)))
- AddLocalMinPoly(e1, e2, Pt);
- break;
- case ctXor:
- AddLocalMinPoly(e1, e2, Pt);
- }
- else
- SwapSides( *e1, *e2 );
- }
-}
-//------------------------------------------------------------------------------
-
-void Clipper::SetHoleState(TEdge *e, OutRec *outrec)
-{
- TEdge *e2 = e->PrevInAEL;
- TEdge *eTmp = 0;
- while (e2)
- {
- if (e2->OutIdx >= 0 && e2->WindDelta != 0)
- {
- if (!eTmp) eTmp = e2;
- else if (eTmp->OutIdx == e2->OutIdx) eTmp = 0;
- }
- e2 = e2->PrevInAEL;
- }
- if (!eTmp)
- {
- outrec->FirstLeft = 0;
- outrec->IsHole = false;
- }
- else
- {
- outrec->FirstLeft = m_PolyOuts[eTmp->OutIdx];
- outrec->IsHole = !outrec->FirstLeft->IsHole;
- }
-}
-//------------------------------------------------------------------------------
-
-OutRec* GetLowermostRec(OutRec *outRec1, OutRec *outRec2)
-{
- //work out which polygon fragment has the correct hole state ...
- if (!outRec1->BottomPt)
- outRec1->BottomPt = GetBottomPt(outRec1->Pts);
- if (!outRec2->BottomPt)
- outRec2->BottomPt = GetBottomPt(outRec2->Pts);
- OutPt *OutPt1 = outRec1->BottomPt;
- OutPt *OutPt2 = outRec2->BottomPt;
- if (OutPt1->Pt.Y > OutPt2->Pt.Y) return outRec1;
- else if (OutPt1->Pt.Y < OutPt2->Pt.Y) return outRec2;
- else if (OutPt1->Pt.X < OutPt2->Pt.X) return outRec1;
- else if (OutPt1->Pt.X > OutPt2->Pt.X) return outRec2;
- else if (OutPt1->Next == OutPt1) return outRec2;
- else if (OutPt2->Next == OutPt2) return outRec1;
- else if (FirstIsBottomPt(OutPt1, OutPt2)) return outRec1;
- else return outRec2;
-}
-//------------------------------------------------------------------------------
-
-bool OutRec1RightOfOutRec2(OutRec* outRec1, OutRec* outRec2)
-{
- do
- {
- outRec1 = outRec1->FirstLeft;
- if (outRec1 == outRec2) return true;
- } while (outRec1);
- return false;
-}
-//------------------------------------------------------------------------------
-
-OutRec* Clipper::GetOutRec(int Idx)
-{
- OutRec* outrec = m_PolyOuts[Idx];
- while (outrec != m_PolyOuts[outrec->Idx])
- outrec = m_PolyOuts[outrec->Idx];
- return outrec;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::AppendPolygon(TEdge *e1, TEdge *e2)
-{
- //get the start and ends of both output polygons ...
- OutRec *outRec1 = m_PolyOuts[e1->OutIdx];
- OutRec *outRec2 = m_PolyOuts[e2->OutIdx];
-
- OutRec *holeStateRec;
- if (OutRec1RightOfOutRec2(outRec1, outRec2))
- holeStateRec = outRec2;
- else if (OutRec1RightOfOutRec2(outRec2, outRec1))
- holeStateRec = outRec1;
- else
- holeStateRec = GetLowermostRec(outRec1, outRec2);
-
- //get the start and ends of both output polygons and
- //join e2 poly onto e1 poly and delete pointers to e2 ...
-
- OutPt* p1_lft = outRec1->Pts;
- OutPt* p1_rt = p1_lft->Prev;
- OutPt* p2_lft = outRec2->Pts;
- OutPt* p2_rt = p2_lft->Prev;
-
- //join e2 poly onto e1 poly and delete pointers to e2 ...
- if( e1->Side == esLeft )
- {
- if( e2->Side == esLeft )
- {
- //z y x a b c
- ReversePolyPtLinks(p2_lft);
- p2_lft->Next = p1_lft;
- p1_lft->Prev = p2_lft;
- p1_rt->Next = p2_rt;
- p2_rt->Prev = p1_rt;
- outRec1->Pts = p2_rt;
- } else
- {
- //x y z a b c
- p2_rt->Next = p1_lft;
- p1_lft->Prev = p2_rt;
- p2_lft->Prev = p1_rt;
- p1_rt->Next = p2_lft;
- outRec1->Pts = p2_lft;
- }
- } else
- {
- if( e2->Side == esRight )
- {
- //a b c z y x
- ReversePolyPtLinks(p2_lft);
- p1_rt->Next = p2_rt;
- p2_rt->Prev = p1_rt;
- p2_lft->Next = p1_lft;
- p1_lft->Prev = p2_lft;
- } else
- {
- //a b c x y z
- p1_rt->Next = p2_lft;
- p2_lft->Prev = p1_rt;
- p1_lft->Prev = p2_rt;
- p2_rt->Next = p1_lft;
- }
- }
-
- outRec1->BottomPt = 0;
- if (holeStateRec == outRec2)
- {
- if (outRec2->FirstLeft != outRec1)
- outRec1->FirstLeft = outRec2->FirstLeft;
- outRec1->IsHole = outRec2->IsHole;
- }
- outRec2->Pts = 0;
- outRec2->BottomPt = 0;
- outRec2->FirstLeft = outRec1;
-
- int OKIdx = e1->OutIdx;
- int ObsoleteIdx = e2->OutIdx;
-
- e1->OutIdx = Unassigned; //nb: safe because we only get here via AddLocalMaxPoly
- e2->OutIdx = Unassigned;
-
- TEdge* e = m_ActiveEdges;
- while( e )
- {
- if( e->OutIdx == ObsoleteIdx )
- {
- e->OutIdx = OKIdx;
- e->Side = e1->Side;
- break;
- }
- e = e->NextInAEL;
- }
-
- outRec2->Idx = outRec1->Idx;
-}
-//------------------------------------------------------------------------------
-
-OutPt* Clipper::AddOutPt(TEdge *e, const IntPoint &pt)
-{
- if( e->OutIdx < 0 )
- {
- OutRec *outRec = CreateOutRec();
- outRec->IsOpen = (e->WindDelta == 0);
- OutPt* newOp = new OutPt;
- outRec->Pts = newOp;
- newOp->Idx = outRec->Idx;
- newOp->Pt = pt;
- newOp->Next = newOp;
- newOp->Prev = newOp;
- if (!outRec->IsOpen)
- SetHoleState(e, outRec);
- e->OutIdx = outRec->Idx;
- return newOp;
- } else
- {
- OutRec *outRec = m_PolyOuts[e->OutIdx];
- //OutRec.Pts is the 'Left-most' point & OutRec.Pts.Prev is the 'Right-most'
- OutPt* op = outRec->Pts;
-
- bool ToFront = (e->Side == esLeft);
- if (ToFront && (pt == op->Pt)) return op;
- else if (!ToFront && (pt == op->Prev->Pt)) return op->Prev;
-
- OutPt* newOp = new OutPt;
- newOp->Idx = outRec->Idx;
- newOp->Pt = pt;
- newOp->Next = op;
- newOp->Prev = op->Prev;
- newOp->Prev->Next = newOp;
- op->Prev = newOp;
- if (ToFront) outRec->Pts = newOp;
- return newOp;
- }
-}
-//------------------------------------------------------------------------------
-
-OutPt* Clipper::GetLastOutPt(TEdge *e)
-{
- OutRec *outRec = m_PolyOuts[e->OutIdx];
- if (e->Side == esLeft)
- return outRec->Pts;
- else
- return outRec->Pts->Prev;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::ProcessHorizontals()
-{
- TEdge* horzEdge;
- while (PopEdgeFromSEL(horzEdge))
- ProcessHorizontal(horzEdge);
-}
-//------------------------------------------------------------------------------
-
-inline bool IsMinima(TEdge *e)
-{
- return e && (e->Prev->NextInLML != e) && (e->Next->NextInLML != e);
-}
-//------------------------------------------------------------------------------
-
-inline bool IsMaxima(TEdge *e, const cInt Y)
-{
- return e && e->Top.Y == Y && !e->NextInLML;
-}
-//------------------------------------------------------------------------------
-
-inline bool IsIntermediate(TEdge *e, const cInt Y)
-{
- return e->Top.Y == Y && e->NextInLML;
-}
-//------------------------------------------------------------------------------
-
-TEdge *GetMaximaPair(TEdge *e)
-{
- if ((e->Next->Top == e->Top) && !e->Next->NextInLML)
- return e->Next;
- else if ((e->Prev->Top == e->Top) && !e->Prev->NextInLML)
- return e->Prev;
- else return 0;
-}
-//------------------------------------------------------------------------------
-
-TEdge *GetMaximaPairEx(TEdge *e)
-{
- //as GetMaximaPair() but returns 0 if MaxPair isn't in AEL (unless it's horizontal)
- TEdge* result = GetMaximaPair(e);
- if (result && (result->OutIdx == Skip ||
- (result->NextInAEL == result->PrevInAEL && !IsHorizontal(*result)))) return 0;
- return result;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::SwapPositionsInSEL(TEdge *Edge1, TEdge *Edge2)
-{
- if( !( Edge1->NextInSEL ) && !( Edge1->PrevInSEL ) ) return;
- if( !( Edge2->NextInSEL ) && !( Edge2->PrevInSEL ) ) return;
-
- if( Edge1->NextInSEL == Edge2 )
- {
- TEdge* Next = Edge2->NextInSEL;
- if( Next ) Next->PrevInSEL = Edge1;
- TEdge* Prev = Edge1->PrevInSEL;
- if( Prev ) Prev->NextInSEL = Edge2;
- Edge2->PrevInSEL = Prev;
- Edge2->NextInSEL = Edge1;
- Edge1->PrevInSEL = Edge2;
- Edge1->NextInSEL = Next;
- }
- else if( Edge2->NextInSEL == Edge1 )
- {
- TEdge* Next = Edge1->NextInSEL;
- if( Next ) Next->PrevInSEL = Edge2;
- TEdge* Prev = Edge2->PrevInSEL;
- if( Prev ) Prev->NextInSEL = Edge1;
- Edge1->PrevInSEL = Prev;
- Edge1->NextInSEL = Edge2;
- Edge2->PrevInSEL = Edge1;
- Edge2->NextInSEL = Next;
- }
- else
- {
- TEdge* Next = Edge1->NextInSEL;
- TEdge* Prev = Edge1->PrevInSEL;
- Edge1->NextInSEL = Edge2->NextInSEL;
- if( Edge1->NextInSEL ) Edge1->NextInSEL->PrevInSEL = Edge1;
- Edge1->PrevInSEL = Edge2->PrevInSEL;
- if( Edge1->PrevInSEL ) Edge1->PrevInSEL->NextInSEL = Edge1;
- Edge2->NextInSEL = Next;
- if( Edge2->NextInSEL ) Edge2->NextInSEL->PrevInSEL = Edge2;
- Edge2->PrevInSEL = Prev;
- if( Edge2->PrevInSEL ) Edge2->PrevInSEL->NextInSEL = Edge2;
- }
-
- if( !Edge1->PrevInSEL ) m_SortedEdges = Edge1;
- else if( !Edge2->PrevInSEL ) m_SortedEdges = Edge2;
-}
-//------------------------------------------------------------------------------
-
-TEdge* GetNextInAEL(TEdge *e, Direction dir)
-{
- return dir == dLeftToRight ? e->NextInAEL : e->PrevInAEL;
-}
-//------------------------------------------------------------------------------
-
-void GetHorzDirection(TEdge& HorzEdge, Direction& Dir, cInt& Left, cInt& Right)
-{
- if (HorzEdge.Bot.X < HorzEdge.Top.X)
- {
- Left = HorzEdge.Bot.X;
- Right = HorzEdge.Top.X;
- Dir = dLeftToRight;
- } else
- {
- Left = HorzEdge.Top.X;
- Right = HorzEdge.Bot.X;
- Dir = dRightToLeft;
- }
-}
-//------------------------------------------------------------------------
-
-/*******************************************************************************
-* Notes: Horizontal edges (HEs) at scanline intersections (ie at the Top or *
-* Bottom of a scanbeam) are processed as if layered. The order in which HEs *
-* are processed doesn't matter. HEs intersect with other HE Bot.Xs only [#] *
-* (or they could intersect with Top.Xs only, ie EITHER Bot.Xs OR Top.Xs), *
-* and with other non-horizontal edges [*]. Once these intersections are *
-* processed, intermediate HEs then 'promote' the Edge above (NextInLML) into *
-* the AEL. These 'promoted' edges may in turn intersect [%] with other HEs. *
-*******************************************************************************/
-
-void Clipper::ProcessHorizontal(TEdge *horzEdge)
-{
- Direction dir;
- cInt horzLeft, horzRight;
- bool IsOpen = (horzEdge->WindDelta == 0);
-
- GetHorzDirection(*horzEdge, dir, horzLeft, horzRight);
-
- TEdge* eLastHorz = horzEdge, *eMaxPair = 0;
- while (eLastHorz->NextInLML && IsHorizontal(*eLastHorz->NextInLML))
- eLastHorz = eLastHorz->NextInLML;
- if (!eLastHorz->NextInLML)
- eMaxPair = GetMaximaPair(eLastHorz);
-
- MaximaList::const_iterator maxIt;
- MaximaList::const_reverse_iterator maxRit;
- if (m_Maxima.size() > 0)
- {
- //get the first maxima in range (X) ...
- if (dir == dLeftToRight)
- {
- maxIt = m_Maxima.begin();
- while (maxIt != m_Maxima.end() && *maxIt <= horzEdge->Bot.X) maxIt++;
- if (maxIt != m_Maxima.end() && *maxIt >= eLastHorz->Top.X)
- maxIt = m_Maxima.end();
- }
- else
- {
- maxRit = m_Maxima.rbegin();
- while (maxRit != m_Maxima.rend() && *maxRit > horzEdge->Bot.X) maxRit++;
- if (maxRit != m_Maxima.rend() && *maxRit <= eLastHorz->Top.X)
- maxRit = m_Maxima.rend();
- }
- }
-
- OutPt* op1 = 0;
-
- for (;;) //loop through consec. horizontal edges
- {
-
- bool IsLastHorz = (horzEdge == eLastHorz);
- TEdge* e = GetNextInAEL(horzEdge, dir);
- while(e)
- {
-
- //this code block inserts extra coords into horizontal edges (in output
- //polygons) whereever maxima touch these horizontal edges. This helps
- //'simplifying' polygons (ie if the Simplify property is set).
- if (m_Maxima.size() > 0)
- {
- if (dir == dLeftToRight)
- {
- while (maxIt != m_Maxima.end() && *maxIt < e->Curr.X)
- {
- if (horzEdge->OutIdx >= 0 && !IsOpen)
- AddOutPt(horzEdge, IntPoint(*maxIt, horzEdge->Bot.Y));
- maxIt++;
- }
- }
- else
- {
- while (maxRit != m_Maxima.rend() && *maxRit > e->Curr.X)
- {
- if (horzEdge->OutIdx >= 0 && !IsOpen)
- AddOutPt(horzEdge, IntPoint(*maxRit, horzEdge->Bot.Y));
- maxRit++;
- }
- }
- };
-
- if ((dir == dLeftToRight && e->Curr.X > horzRight) ||
- (dir == dRightToLeft && e->Curr.X < horzLeft)) break;
-
- //Also break if we've got to the end of an intermediate horizontal edge ...
- //nb: Smaller Dx's are to the right of larger Dx's ABOVE the horizontal.
- if (e->Curr.X == horzEdge->Top.X && horzEdge->NextInLML &&
- e->Dx < horzEdge->NextInLML->Dx) break;
-
- if (horzEdge->OutIdx >= 0 && !IsOpen) //note: may be done multiple times
- {
- op1 = AddOutPt(horzEdge, e->Curr);
- TEdge* eNextHorz = m_SortedEdges;
- while (eNextHorz)
- {
- if (eNextHorz->OutIdx >= 0 &&
- HorzSegmentsOverlap(horzEdge->Bot.X,
- horzEdge->Top.X, eNextHorz->Bot.X, eNextHorz->Top.X))
- {
- OutPt* op2 = GetLastOutPt(eNextHorz);
- AddJoin(op2, op1, eNextHorz->Top);
- }
- eNextHorz = eNextHorz->NextInSEL;
- }
- AddGhostJoin(op1, horzEdge->Bot);
- }
-
- //OK, so far we're still in range of the horizontal Edge but make sure
- //we're at the last of consec. horizontals when matching with eMaxPair
- if(e == eMaxPair && IsLastHorz)
- {
- if (horzEdge->OutIdx >= 0)
- AddLocalMaxPoly(horzEdge, eMaxPair, horzEdge->Top);
- DeleteFromAEL(horzEdge);
- DeleteFromAEL(eMaxPair);
- return;
- }
-
- if(dir == dLeftToRight)
- {
- IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y);
- IntersectEdges(horzEdge, e, Pt);
- }
- else
- {
- IntPoint Pt = IntPoint(e->Curr.X, horzEdge->Curr.Y);
- IntersectEdges( e, horzEdge, Pt);
- }
- TEdge* eNext = GetNextInAEL(e, dir);
- SwapPositionsInAEL( horzEdge, e );
- e = eNext;
- } //end while(e)
-
- //Break out of loop if HorzEdge.NextInLML is not also horizontal ...
- if (!horzEdge->NextInLML || !IsHorizontal(*horzEdge->NextInLML)) break;
-
- UpdateEdgeIntoAEL(horzEdge);
- if (horzEdge->OutIdx >= 0) AddOutPt(horzEdge, horzEdge->Bot);
- GetHorzDirection(*horzEdge, dir, horzLeft, horzRight);
-
- } //end for (;;)
-
- if (horzEdge->OutIdx >= 0 && !op1)
- {
- op1 = GetLastOutPt(horzEdge);
- TEdge* eNextHorz = m_SortedEdges;
- while (eNextHorz)
- {
- if (eNextHorz->OutIdx >= 0 &&
- HorzSegmentsOverlap(horzEdge->Bot.X,
- horzEdge->Top.X, eNextHorz->Bot.X, eNextHorz->Top.X))
- {
- OutPt* op2 = GetLastOutPt(eNextHorz);
- AddJoin(op2, op1, eNextHorz->Top);
- }
- eNextHorz = eNextHorz->NextInSEL;
- }
- AddGhostJoin(op1, horzEdge->Top);
- }
-
- if (horzEdge->NextInLML)
- {
- if(horzEdge->OutIdx >= 0)
- {
- op1 = AddOutPt( horzEdge, horzEdge->Top);
- UpdateEdgeIntoAEL(horzEdge);
- if (horzEdge->WindDelta == 0) return;
- //nb: HorzEdge is no longer horizontal here
- TEdge* ePrev = horzEdge->PrevInAEL;
- TEdge* eNext = horzEdge->NextInAEL;
- if (ePrev && ePrev->Curr.X == horzEdge->Bot.X &&
- ePrev->Curr.Y == horzEdge->Bot.Y && ePrev->WindDelta != 0 &&
- (ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y &&
- SlopesEqual(*horzEdge, *ePrev, m_UseFullRange)))
- {
- OutPt* op2 = AddOutPt(ePrev, horzEdge->Bot);
- AddJoin(op1, op2, horzEdge->Top);
- }
- else if (eNext && eNext->Curr.X == horzEdge->Bot.X &&
- eNext->Curr.Y == horzEdge->Bot.Y && eNext->WindDelta != 0 &&
- eNext->OutIdx >= 0 && eNext->Curr.Y > eNext->Top.Y &&
- SlopesEqual(*horzEdge, *eNext, m_UseFullRange))
- {
- OutPt* op2 = AddOutPt(eNext, horzEdge->Bot);
- AddJoin(op1, op2, horzEdge->Top);
- }
- }
- else
- UpdateEdgeIntoAEL(horzEdge);
- }
- else
- {
- if (horzEdge->OutIdx >= 0) AddOutPt(horzEdge, horzEdge->Top);
- DeleteFromAEL(horzEdge);
- }
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::ProcessIntersections(const cInt topY)
-{
- if( !m_ActiveEdges ) return true;
- try {
- BuildIntersectList(topY);
- size_t IlSize = m_IntersectList.size();
- if (IlSize == 0) return true;
- if (IlSize == 1 || FixupIntersectionOrder()) ProcessIntersectList();
- else return false;
- }
- catch(...)
- {
- m_SortedEdges = 0;
- DisposeIntersectNodes();
- throw clipperException("ProcessIntersections error");
- }
- m_SortedEdges = 0;
- return true;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::DisposeIntersectNodes()
-{
- for (size_t i = 0; i < m_IntersectList.size(); ++i )
- delete m_IntersectList[i];
- m_IntersectList.clear();
-}
-//------------------------------------------------------------------------------
-
-void Clipper::BuildIntersectList(const cInt topY)
-{
- if ( !m_ActiveEdges ) return;
-
- //prepare for sorting ...
- TEdge* e = m_ActiveEdges;
- m_SortedEdges = e;
- while( e )
- {
- e->PrevInSEL = e->PrevInAEL;
- e->NextInSEL = e->NextInAEL;
- e->Curr.X = TopX( *e, topY );
- e = e->NextInAEL;
- }
-
- //bubblesort ...
- bool isModified;
- do
- {
- isModified = false;
- e = m_SortedEdges;
- while( e->NextInSEL )
- {
- TEdge *eNext = e->NextInSEL;
- IntPoint Pt;
- if(e->Curr.X > eNext->Curr.X)
- {
- IntersectPoint(*e, *eNext, Pt);
- if (Pt.Y < topY) Pt = IntPoint(TopX(*e, topY), topY);
- IntersectNode * newNode = new IntersectNode;
- newNode->Edge1 = e;
- newNode->Edge2 = eNext;
- newNode->Pt = Pt;
- m_IntersectList.push_back(newNode);
-
- SwapPositionsInSEL(e, eNext);
- isModified = true;
- }
- else
- e = eNext;
- }
- if( e->PrevInSEL ) e->PrevInSEL->NextInSEL = 0;
- else break;
- }
- while ( isModified );
- m_SortedEdges = 0; //important
-}
-//------------------------------------------------------------------------------
-
-
-void Clipper::ProcessIntersectList()
-{
- for (size_t i = 0; i < m_IntersectList.size(); ++i)
- {
- IntersectNode* iNode = m_IntersectList[i];
- {
- IntersectEdges( iNode->Edge1, iNode->Edge2, iNode->Pt);
- SwapPositionsInAEL( iNode->Edge1 , iNode->Edge2 );
- }
- delete iNode;
- }
- m_IntersectList.clear();
-}
-//------------------------------------------------------------------------------
-
-bool IntersectListSort(IntersectNode* node1, IntersectNode* node2)
-{
- return node2->Pt.Y < node1->Pt.Y;
-}
-//------------------------------------------------------------------------------
-
-inline bool EdgesAdjacent(const IntersectNode &inode)
-{
- return (inode.Edge1->NextInSEL == inode.Edge2) ||
- (inode.Edge1->PrevInSEL == inode.Edge2);
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::FixupIntersectionOrder()
-{
- //pre-condition: intersections are sorted Bottom-most first.
- //Now it's crucial that intersections are made only between adjacent edges,
- //so to ensure this the order of intersections may need adjusting ...
- CopyAELToSEL();
- std::sort(m_IntersectList.begin(), m_IntersectList.end(), IntersectListSort);
- size_t cnt = m_IntersectList.size();
- for (size_t i = 0; i < cnt; ++i)
- {
- if (!EdgesAdjacent(*m_IntersectList[i]))
- {
- size_t j = i + 1;
- while (j < cnt && !EdgesAdjacent(*m_IntersectList[j])) j++;
- if (j == cnt) return false;
- std::swap(m_IntersectList[i], m_IntersectList[j]);
- }
- SwapPositionsInSEL(m_IntersectList[i]->Edge1, m_IntersectList[i]->Edge2);
- }
- return true;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::DoMaxima(TEdge *e)
-{
- TEdge* eMaxPair = GetMaximaPairEx(e);
- if (!eMaxPair)
- {
- if (e->OutIdx >= 0)
- AddOutPt(e, e->Top);
- DeleteFromAEL(e);
- return;
- }
-
- TEdge* eNext = e->NextInAEL;
- while(eNext && eNext != eMaxPair)
- {
- IntersectEdges(e, eNext, e->Top);
- SwapPositionsInAEL(e, eNext);
- eNext = e->NextInAEL;
- }
-
- if(e->OutIdx == Unassigned && eMaxPair->OutIdx == Unassigned)
- {
- DeleteFromAEL(e);
- DeleteFromAEL(eMaxPair);
- }
- else if( e->OutIdx >= 0 && eMaxPair->OutIdx >= 0 )
- {
- if (e->OutIdx >= 0) AddLocalMaxPoly(e, eMaxPair, e->Top);
- DeleteFromAEL(e);
- DeleteFromAEL(eMaxPair);
- }
-#ifdef use_lines
- else if (e->WindDelta == 0)
- {
- if (e->OutIdx >= 0)
- {
- AddOutPt(e, e->Top);
- e->OutIdx = Unassigned;
- }
- DeleteFromAEL(e);
-
- if (eMaxPair->OutIdx >= 0)
- {
- AddOutPt(eMaxPair, e->Top);
- eMaxPair->OutIdx = Unassigned;
- }
- DeleteFromAEL(eMaxPair);
- }
-#endif
- else throw clipperException("DoMaxima error");
-}
-//------------------------------------------------------------------------------
-
-void Clipper::ProcessEdgesAtTopOfScanbeam(const cInt topY)
-{
- TEdge* e = m_ActiveEdges;
- while( e )
- {
- //1. process maxima, treating them as if they're 'bent' horizontal edges,
- // but exclude maxima with horizontal edges. nb: e can't be a horizontal.
- bool IsMaximaEdge = IsMaxima(e, topY);
-
- if(IsMaximaEdge)
- {
- TEdge* eMaxPair = GetMaximaPairEx(e);
- IsMaximaEdge = (!eMaxPair || !IsHorizontal(*eMaxPair));
- }
-
- if(IsMaximaEdge)
- {
- if (m_StrictSimple) m_Maxima.push_back(e->Top.X);
- TEdge* ePrev = e->PrevInAEL;
- DoMaxima(e);
- if( !ePrev ) e = m_ActiveEdges;
- else e = ePrev->NextInAEL;
- }
- else
- {
- //2. promote horizontal edges, otherwise update Curr.X and Curr.Y ...
- if (IsIntermediate(e, topY) && IsHorizontal(*e->NextInLML))
- {
- UpdateEdgeIntoAEL(e);
- if (e->OutIdx >= 0)
- AddOutPt(e, e->Bot);
- AddEdgeToSEL(e);
- }
- else
- {
- e->Curr.X = TopX( *e, topY );
- e->Curr.Y = topY;
- }
-
- //When StrictlySimple and 'e' is being touched by another edge, then
- //make sure both edges have a vertex here ...
- if (m_StrictSimple)
- {
- TEdge* ePrev = e->PrevInAEL;
- if ((e->OutIdx >= 0) && (e->WindDelta != 0) && ePrev && (ePrev->OutIdx >= 0) &&
- (ePrev->Curr.X == e->Curr.X) && (ePrev->WindDelta != 0))
- {
- IntPoint pt = e->Curr;
-#ifdef use_xyz
- SetZ(pt, *ePrev, *e);
-#endif
- OutPt* op = AddOutPt(ePrev, pt);
- OutPt* op2 = AddOutPt(e, pt);
- AddJoin(op, op2, pt); //StrictlySimple (type-3) join
- }
- }
-
- e = e->NextInAEL;
- }
- }
-
- //3. Process horizontals at the Top of the scanbeam ...
- m_Maxima.sort();
- ProcessHorizontals();
- m_Maxima.clear();
-
- //4. Promote intermediate vertices ...
- e = m_ActiveEdges;
- while(e)
- {
- if(IsIntermediate(e, topY))
- {
- OutPt* op = 0;
- if( e->OutIdx >= 0 )
- op = AddOutPt(e, e->Top);
- UpdateEdgeIntoAEL(e);
-
- //if output polygons share an edge, they'll need joining later ...
- TEdge* ePrev = e->PrevInAEL;
- TEdge* eNext = e->NextInAEL;
- if (ePrev && ePrev->Curr.X == e->Bot.X &&
- ePrev->Curr.Y == e->Bot.Y && op &&
- ePrev->OutIdx >= 0 && ePrev->Curr.Y > ePrev->Top.Y &&
- SlopesEqual(e->Curr, e->Top, ePrev->Curr, ePrev->Top, m_UseFullRange) &&
- (e->WindDelta != 0) && (ePrev->WindDelta != 0))
- {
- OutPt* op2 = AddOutPt(ePrev, e->Bot);
- AddJoin(op, op2, e->Top);
- }
- else if (eNext && eNext->Curr.X == e->Bot.X &&
- eNext->Curr.Y == e->Bot.Y && op &&
- eNext->OutIdx >= 0 && eNext->Curr.Y > eNext->Top.Y &&
- SlopesEqual(e->Curr, e->Top, eNext->Curr, eNext->Top, m_UseFullRange) &&
- (e->WindDelta != 0) && (eNext->WindDelta != 0))
- {
- OutPt* op2 = AddOutPt(eNext, e->Bot);
- AddJoin(op, op2, e->Top);
- }
- }
- e = e->NextInAEL;
- }
-}
-//------------------------------------------------------------------------------
-
-void Clipper::FixupOutPolyline(OutRec &outrec)
-{
- OutPt *pp = outrec.Pts;
- OutPt *lastPP = pp->Prev;
- while (pp != lastPP)
- {
- pp = pp->Next;
- if (pp->Pt == pp->Prev->Pt)
- {
- if (pp == lastPP) lastPP = pp->Prev;
- OutPt *tmpPP = pp->Prev;
- tmpPP->Next = pp->Next;
- pp->Next->Prev = tmpPP;
- delete pp;
- pp = tmpPP;
- }
- }
-
- if (pp == pp->Prev)
- {
- DisposeOutPts(pp);
- outrec.Pts = 0;
- return;
- }
-}
-//------------------------------------------------------------------------------
-
-void Clipper::FixupOutPolygon(OutRec &outrec)
-{
- //FixupOutPolygon() - removes duplicate points and simplifies consecutive
- //parallel edges by removing the middle vertex.
- OutPt *lastOK = 0;
- outrec.BottomPt = 0;
- OutPt *pp = outrec.Pts;
- bool preserveCol = m_PreserveCollinear || m_StrictSimple;
-
- for (;;)
- {
- if (pp->Prev == pp || pp->Prev == pp->Next)
- {
- DisposeOutPts(pp);
- outrec.Pts = 0;
- return;
- }
-
- //test for duplicate points and collinear edges ...
- if ((pp->Pt == pp->Next->Pt) || (pp->Pt == pp->Prev->Pt) ||
- (SlopesEqual(pp->Prev->Pt, pp->Pt, pp->Next->Pt, m_UseFullRange) &&
- (!preserveCol || !Pt2IsBetweenPt1AndPt3(pp->Prev->Pt, pp->Pt, pp->Next->Pt))))
- {
- lastOK = 0;
- OutPt *tmp = pp;
- pp->Prev->Next = pp->Next;
- pp->Next->Prev = pp->Prev;
- pp = pp->Prev;
- delete tmp;
- }
- else if (pp == lastOK) break;
- else
- {
- if (!lastOK) lastOK = pp;
- pp = pp->Next;
- }
- }
- outrec.Pts = pp;
-}
-//------------------------------------------------------------------------------
-
-int PointCount(OutPt *Pts)
-{
- if (!Pts) return 0;
- int result = 0;
- OutPt* p = Pts;
- do
- {
- result++;
- p = p->Next;
- }
- while (p != Pts);
- return result;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::BuildResult(Paths &polys)
-{
- polys.reserve(m_PolyOuts.size());
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
- {
- if (!m_PolyOuts[i]->Pts) continue;
- Path pg;
- OutPt* p = m_PolyOuts[i]->Pts->Prev;
- int cnt = PointCount(p);
- if (cnt < 2) continue;
- pg.reserve(cnt);
- for (int i = 0; i < cnt; ++i)
- {
- pg.push_back(p->Pt);
- p = p->Prev;
- }
- polys.push_back(pg);
- }
-}
-//------------------------------------------------------------------------------
-
-void Clipper::BuildResult2(PolyTree& polytree)
-{
- polytree.Clear();
- polytree.AllNodes.reserve(m_PolyOuts.size());
- //add each output polygon/contour to polytree ...
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++)
- {
- OutRec* outRec = m_PolyOuts[i];
- int cnt = PointCount(outRec->Pts);
- if ((outRec->IsOpen && cnt < 2) || (!outRec->IsOpen && cnt < 3)) continue;
- FixHoleLinkage(*outRec);
- PolyNode* pn = new PolyNode();
- //nb: polytree takes ownership of all the PolyNodes
- polytree.AllNodes.push_back(pn);
- outRec->PolyNd = pn;
- pn->Parent = 0;
- pn->Index = 0;
- pn->Contour.reserve(cnt);
- OutPt *op = outRec->Pts->Prev;
- for (int j = 0; j < cnt; j++)
- {
- pn->Contour.push_back(op->Pt);
- op = op->Prev;
- }
- }
-
- //fixup PolyNode links etc ...
- polytree.Childs.reserve(m_PolyOuts.size());
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); i++)
- {
- OutRec* outRec = m_PolyOuts[i];
- if (!outRec->PolyNd) continue;
- if (outRec->IsOpen)
- {
- outRec->PolyNd->m_IsOpen = true;
- polytree.AddChild(*outRec->PolyNd);
- }
- else if (outRec->FirstLeft && outRec->FirstLeft->PolyNd)
- outRec->FirstLeft->PolyNd->AddChild(*outRec->PolyNd);
- else
- polytree.AddChild(*outRec->PolyNd);
- }
-}
-//------------------------------------------------------------------------------
-
-void SwapIntersectNodes(IntersectNode &int1, IntersectNode &int2)
-{
- //just swap the contents (because fIntersectNodes is a single-linked-list)
- IntersectNode inode = int1; //gets a copy of Int1
- int1.Edge1 = int2.Edge1;
- int1.Edge2 = int2.Edge2;
- int1.Pt = int2.Pt;
- int2.Edge1 = inode.Edge1;
- int2.Edge2 = inode.Edge2;
- int2.Pt = inode.Pt;
-}
-//------------------------------------------------------------------------------
-
-inline bool E2InsertsBeforeE1(TEdge &e1, TEdge &e2)
-{
- if (e2.Curr.X == e1.Curr.X)
- {
- if (e2.Top.Y > e1.Top.Y)
- return e2.Top.X < TopX(e1, e2.Top.Y);
- else return e1.Top.X > TopX(e2, e1.Top.Y);
- }
- else return e2.Curr.X < e1.Curr.X;
-}
-//------------------------------------------------------------------------------
-
-bool GetOverlap(const cInt a1, const cInt a2, const cInt b1, const cInt b2,
- cInt& Left, cInt& Right)
-{
- if (a1 < a2)
- {
- if (b1 < b2) {Left = std::max(a1,b1); Right = std::min(a2,b2);}
- else {Left = std::max(a1,b2); Right = std::min(a2,b1);}
- }
- else
- {
- if (b1 < b2) {Left = std::max(a2,b1); Right = std::min(a1,b2);}
- else {Left = std::max(a2,b2); Right = std::min(a1,b1);}
- }
- return Left < Right;
-}
-//------------------------------------------------------------------------------
-
-inline void UpdateOutPtIdxs(OutRec& outrec)
-{
- OutPt* op = outrec.Pts;
- do
- {
- op->Idx = outrec.Idx;
- op = op->Prev;
- }
- while(op != outrec.Pts);
-}
-//------------------------------------------------------------------------------
-
-void Clipper::InsertEdgeIntoAEL(TEdge *edge, TEdge* startEdge)
-{
- if(!m_ActiveEdges)
- {
- edge->PrevInAEL = 0;
- edge->NextInAEL = 0;
- m_ActiveEdges = edge;
- }
- else if(!startEdge && E2InsertsBeforeE1(*m_ActiveEdges, *edge))
- {
- edge->PrevInAEL = 0;
- edge->NextInAEL = m_ActiveEdges;
- m_ActiveEdges->PrevInAEL = edge;
- m_ActiveEdges = edge;
- }
- else
- {
- if(!startEdge) startEdge = m_ActiveEdges;
- while(startEdge->NextInAEL &&
- !E2InsertsBeforeE1(*startEdge->NextInAEL , *edge))
- startEdge = startEdge->NextInAEL;
- edge->NextInAEL = startEdge->NextInAEL;
- if(startEdge->NextInAEL) startEdge->NextInAEL->PrevInAEL = edge;
- edge->PrevInAEL = startEdge;
- startEdge->NextInAEL = edge;
- }
-}
-//----------------------------------------------------------------------
-
-OutPt* DupOutPt(OutPt* outPt, bool InsertAfter)
-{
- OutPt* result = new OutPt;
- result->Pt = outPt->Pt;
- result->Idx = outPt->Idx;
- if (InsertAfter)
- {
- result->Next = outPt->Next;
- result->Prev = outPt;
- outPt->Next->Prev = result;
- outPt->Next = result;
- }
- else
- {
- result->Prev = outPt->Prev;
- result->Next = outPt;
- outPt->Prev->Next = result;
- outPt->Prev = result;
- }
- return result;
-}
-//------------------------------------------------------------------------------
-
-bool JoinHorz(OutPt* op1, OutPt* op1b, OutPt* op2, OutPt* op2b,
- const IntPoint Pt, bool DiscardLeft)
-{
- Direction Dir1 = (op1->Pt.X > op1b->Pt.X ? dRightToLeft : dLeftToRight);
- Direction Dir2 = (op2->Pt.X > op2b->Pt.X ? dRightToLeft : dLeftToRight);
- if (Dir1 == Dir2) return false;
-
- //When DiscardLeft, we want Op1b to be on the Left of Op1, otherwise we
- //want Op1b to be on the Right. (And likewise with Op2 and Op2b.)
- //So, to facilitate this while inserting Op1b and Op2b ...
- //when DiscardLeft, make sure we're AT or RIGHT of Pt before adding Op1b,
- //otherwise make sure we're AT or LEFT of Pt. (Likewise with Op2b.)
- if (Dir1 == dLeftToRight)
- {
- while (op1->Next->Pt.X <= Pt.X &&
- op1->Next->Pt.X >= op1->Pt.X && op1->Next->Pt.Y == Pt.Y)
- op1 = op1->Next;
- if (DiscardLeft && (op1->Pt.X != Pt.X)) op1 = op1->Next;
- op1b = DupOutPt(op1, !DiscardLeft);
- if (op1b->Pt != Pt)
- {
- op1 = op1b;
- op1->Pt = Pt;
- op1b = DupOutPt(op1, !DiscardLeft);
- }
- }
- else
- {
- while (op1->Next->Pt.X >= Pt.X &&
- op1->Next->Pt.X <= op1->Pt.X && op1->Next->Pt.Y == Pt.Y)
- op1 = op1->Next;
- if (!DiscardLeft && (op1->Pt.X != Pt.X)) op1 = op1->Next;
- op1b = DupOutPt(op1, DiscardLeft);
- if (op1b->Pt != Pt)
- {
- op1 = op1b;
- op1->Pt = Pt;
- op1b = DupOutPt(op1, DiscardLeft);
- }
- }
-
- if (Dir2 == dLeftToRight)
- {
- while (op2->Next->Pt.X <= Pt.X &&
- op2->Next->Pt.X >= op2->Pt.X && op2->Next->Pt.Y == Pt.Y)
- op2 = op2->Next;
- if (DiscardLeft && (op2->Pt.X != Pt.X)) op2 = op2->Next;
- op2b = DupOutPt(op2, !DiscardLeft);
- if (op2b->Pt != Pt)
- {
- op2 = op2b;
- op2->Pt = Pt;
- op2b = DupOutPt(op2, !DiscardLeft);
- };
- } else
- {
- while (op2->Next->Pt.X >= Pt.X &&
- op2->Next->Pt.X <= op2->Pt.X && op2->Next->Pt.Y == Pt.Y)
- op2 = op2->Next;
- if (!DiscardLeft && (op2->Pt.X != Pt.X)) op2 = op2->Next;
- op2b = DupOutPt(op2, DiscardLeft);
- if (op2b->Pt != Pt)
- {
- op2 = op2b;
- op2->Pt = Pt;
- op2b = DupOutPt(op2, DiscardLeft);
- };
- };
-
- if ((Dir1 == dLeftToRight) == DiscardLeft)
- {
- op1->Prev = op2;
- op2->Next = op1;
- op1b->Next = op2b;
- op2b->Prev = op1b;
- }
- else
- {
- op1->Next = op2;
- op2->Prev = op1;
- op1b->Prev = op2b;
- op2b->Next = op1b;
- }
- return true;
-}
-//------------------------------------------------------------------------------
-
-bool Clipper::JoinPoints(Join *j, OutRec* outRec1, OutRec* outRec2)
-{
- OutPt *op1 = j->OutPt1, *op1b;
- OutPt *op2 = j->OutPt2, *op2b;
-
- //There are 3 kinds of joins for output polygons ...
- //1. Horizontal joins where Join.OutPt1 & Join.OutPt2 are vertices anywhere
- //along (horizontal) collinear edges (& Join.OffPt is on the same horizontal).
- //2. Non-horizontal joins where Join.OutPt1 & Join.OutPt2 are at the same
- //location at the Bottom of the overlapping segment (& Join.OffPt is above).
- //3. StrictSimple joins where edges touch but are not collinear and where
- //Join.OutPt1, Join.OutPt2 & Join.OffPt all share the same point.
- bool isHorizontal = (j->OutPt1->Pt.Y == j->OffPt.Y);
-
- if (isHorizontal && (j->OffPt == j->OutPt1->Pt) &&
- (j->OffPt == j->OutPt2->Pt))
- {
- //Strictly Simple join ...
- if (outRec1 != outRec2) return false;
- op1b = j->OutPt1->Next;
- while (op1b != op1 && (op1b->Pt == j->OffPt))
- op1b = op1b->Next;
- bool reverse1 = (op1b->Pt.Y > j->OffPt.Y);
- op2b = j->OutPt2->Next;
- while (op2b != op2 && (op2b->Pt == j->OffPt))
- op2b = op2b->Next;
- bool reverse2 = (op2b->Pt.Y > j->OffPt.Y);
- if (reverse1 == reverse2) return false;
- if (reverse1)
- {
- op1b = DupOutPt(op1, false);
- op2b = DupOutPt(op2, true);
- op1->Prev = op2;
- op2->Next = op1;
- op1b->Next = op2b;
- op2b->Prev = op1b;
- j->OutPt1 = op1;
- j->OutPt2 = op1b;
- return true;
- } else
- {
- op1b = DupOutPt(op1, true);
- op2b = DupOutPt(op2, false);
- op1->Next = op2;
- op2->Prev = op1;
- op1b->Prev = op2b;
- op2b->Next = op1b;
- j->OutPt1 = op1;
- j->OutPt2 = op1b;
- return true;
- }
- }
- else if (isHorizontal)
- {
- //treat horizontal joins differently to non-horizontal joins since with
- //them we're not yet sure where the overlapping is. OutPt1.Pt & OutPt2.Pt
- //may be anywhere along the horizontal edge.
- op1b = op1;
- while (op1->Prev->Pt.Y == op1->Pt.Y && op1->Prev != op1b && op1->Prev != op2)
- op1 = op1->Prev;
- while (op1b->Next->Pt.Y == op1b->Pt.Y && op1b->Next != op1 && op1b->Next != op2)
- op1b = op1b->Next;
- if (op1b->Next == op1 || op1b->Next == op2) return false; //a flat 'polygon'
-
- op2b = op2;
- while (op2->Prev->Pt.Y == op2->Pt.Y && op2->Prev != op2b && op2->Prev != op1b)
- op2 = op2->Prev;
- while (op2b->Next->Pt.Y == op2b->Pt.Y && op2b->Next != op2 && op2b->Next != op1)
- op2b = op2b->Next;
- if (op2b->Next == op2 || op2b->Next == op1) return false; //a flat 'polygon'
-
- cInt Left, Right;
- //Op1 --> Op1b & Op2 --> Op2b are the extremites of the horizontal edges
- if (!GetOverlap(op1->Pt.X, op1b->Pt.X, op2->Pt.X, op2b->Pt.X, Left, Right))
- return false;
-
- //DiscardLeftSide: when overlapping edges are joined, a spike will created
- //which needs to be cleaned up. However, we don't want Op1 or Op2 caught up
- //on the discard Side as either may still be needed for other joins ...
- IntPoint Pt;
- bool DiscardLeftSide;
- if (op1->Pt.X >= Left && op1->Pt.X <= Right)
- {
- Pt = op1->Pt; DiscardLeftSide = (op1->Pt.X > op1b->Pt.X);
- }
- else if (op2->Pt.X >= Left&& op2->Pt.X <= Right)
- {
- Pt = op2->Pt; DiscardLeftSide = (op2->Pt.X > op2b->Pt.X);
- }
- else if (op1b->Pt.X >= Left && op1b->Pt.X <= Right)
- {
- Pt = op1b->Pt; DiscardLeftSide = op1b->Pt.X > op1->Pt.X;
- }
- else
- {
- Pt = op2b->Pt; DiscardLeftSide = (op2b->Pt.X > op2->Pt.X);
- }
- j->OutPt1 = op1; j->OutPt2 = op2;
- return JoinHorz(op1, op1b, op2, op2b, Pt, DiscardLeftSide);
- } else
- {
- //nb: For non-horizontal joins ...
- // 1. Jr.OutPt1.Pt.Y == Jr.OutPt2.Pt.Y
- // 2. Jr.OutPt1.Pt > Jr.OffPt.Y
-
- //make sure the polygons are correctly oriented ...
- op1b = op1->Next;
- while ((op1b->Pt == op1->Pt) && (op1b != op1)) op1b = op1b->Next;
- bool Reverse1 = ((op1b->Pt.Y > op1->Pt.Y) ||
- !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange));
- if (Reverse1)
- {
- op1b = op1->Prev;
- while ((op1b->Pt == op1->Pt) && (op1b != op1)) op1b = op1b->Prev;
- if ((op1b->Pt.Y > op1->Pt.Y) ||
- !SlopesEqual(op1->Pt, op1b->Pt, j->OffPt, m_UseFullRange)) return false;
- };
- op2b = op2->Next;
- while ((op2b->Pt == op2->Pt) && (op2b != op2))op2b = op2b->Next;
- bool Reverse2 = ((op2b->Pt.Y > op2->Pt.Y) ||
- !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange));
- if (Reverse2)
- {
- op2b = op2->Prev;
- while ((op2b->Pt == op2->Pt) && (op2b != op2)) op2b = op2b->Prev;
- if ((op2b->Pt.Y > op2->Pt.Y) ||
- !SlopesEqual(op2->Pt, op2b->Pt, j->OffPt, m_UseFullRange)) return false;
- }
-
- if ((op1b == op1) || (op2b == op2) || (op1b == op2b) ||
- ((outRec1 == outRec2) && (Reverse1 == Reverse2))) return false;
-
- if (Reverse1)
- {
- op1b = DupOutPt(op1, false);
- op2b = DupOutPt(op2, true);
- op1->Prev = op2;
- op2->Next = op1;
- op1b->Next = op2b;
- op2b->Prev = op1b;
- j->OutPt1 = op1;
- j->OutPt2 = op1b;
- return true;
- } else
- {
- op1b = DupOutPt(op1, true);
- op2b = DupOutPt(op2, false);
- op1->Next = op2;
- op2->Prev = op1;
- op1b->Prev = op2b;
- op2b->Next = op1b;
- j->OutPt1 = op1;
- j->OutPt2 = op1b;
- return true;
- }
- }
-}
-//----------------------------------------------------------------------
-
-static OutRec* ParseFirstLeft(OutRec* FirstLeft)
-{
- while (FirstLeft && !FirstLeft->Pts)
- FirstLeft = FirstLeft->FirstLeft;
- return FirstLeft;
-}
-//------------------------------------------------------------------------------
-
-void Clipper::FixupFirstLefts1(OutRec* OldOutRec, OutRec* NewOutRec)
-{
- //tests if NewOutRec contains the polygon before reassigning FirstLeft
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
- {
- OutRec* outRec = m_PolyOuts[i];
- OutRec* firstLeft = ParseFirstLeft(outRec->FirstLeft);
- if (outRec->Pts && firstLeft == OldOutRec)
- {
- if (Poly2ContainsPoly1(outRec->Pts, NewOutRec->Pts))
- outRec->FirstLeft = NewOutRec;
- }
- }
-}
-//----------------------------------------------------------------------
-
-void Clipper::FixupFirstLefts2(OutRec* InnerOutRec, OutRec* OuterOutRec)
-{
- //A polygon has split into two such that one is now the inner of the other.
- //It's possible that these polygons now wrap around other polygons, so check
- //every polygon that's also contained by OuterOutRec's FirstLeft container
- //(including 0) to see if they've become inner to the new inner polygon ...
- OutRec* orfl = OuterOutRec->FirstLeft;
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
- {
- OutRec* outRec = m_PolyOuts[i];
-
- if (!outRec->Pts || outRec == OuterOutRec || outRec == InnerOutRec)
- continue;
- OutRec* firstLeft = ParseFirstLeft(outRec->FirstLeft);
- if (firstLeft != orfl && firstLeft != InnerOutRec && firstLeft != OuterOutRec)
- continue;
- if (Poly2ContainsPoly1(outRec->Pts, InnerOutRec->Pts))
- outRec->FirstLeft = InnerOutRec;
- else if (Poly2ContainsPoly1(outRec->Pts, OuterOutRec->Pts))
- outRec->FirstLeft = OuterOutRec;
- else if (outRec->FirstLeft == InnerOutRec || outRec->FirstLeft == OuterOutRec)
- outRec->FirstLeft = orfl;
- }
-}
-//----------------------------------------------------------------------
-void Clipper::FixupFirstLefts3(OutRec* OldOutRec, OutRec* NewOutRec)
-{
- //reassigns FirstLeft WITHOUT testing if NewOutRec contains the polygon
- for (PolyOutList::size_type i = 0; i < m_PolyOuts.size(); ++i)
- {
- OutRec* outRec = m_PolyOuts[i];
- OutRec* firstLeft = ParseFirstLeft(outRec->FirstLeft);
- if (outRec->Pts && outRec->FirstLeft == OldOutRec)
- outRec->FirstLeft = NewOutRec;
- }
-}
-//----------------------------------------------------------------------
-
-void Clipper::JoinCommonEdges()
-{
- for (JoinList::size_type i = 0; i < m_Joins.size(); i++)
- {
- Join* join = m_Joins[i];
-
- OutRec *outRec1 = GetOutRec(join->OutPt1->Idx);
- OutRec *outRec2 = GetOutRec(join->OutPt2->Idx);
-
- if (!outRec1->Pts || !outRec2->Pts) continue;
- if (outRec1->IsOpen || outRec2->IsOpen) continue;
-
- //get the polygon fragment with the correct hole state (FirstLeft)
- //before calling JoinPoints() ...
- OutRec *holeStateRec;
- if (outRec1 == outRec2) holeStateRec = outRec1;
- else if (OutRec1RightOfOutRec2(outRec1, outRec2)) holeStateRec = outRec2;
- else if (OutRec1RightOfOutRec2(outRec2, outRec1)) holeStateRec = outRec1;
- else holeStateRec = GetLowermostRec(outRec1, outRec2);
-
- if (!JoinPoints(join, outRec1, outRec2)) continue;
-
- if (outRec1 == outRec2)
- {
- //instead of joining two polygons, we've just created a new one by
- //splitting one polygon into two.
- outRec1->Pts = join->OutPt1;
- outRec1->BottomPt = 0;
- outRec2 = CreateOutRec();
- outRec2->Pts = join->OutPt2;
-
- //update all OutRec2.Pts Idx's ...
- UpdateOutPtIdxs(*outRec2);
-
- if (Poly2ContainsPoly1(outRec2->Pts, outRec1->Pts))
- {
- //outRec1 contains outRec2 ...
- outRec2->IsHole = !outRec1->IsHole;
- outRec2->FirstLeft = outRec1;
-
- if (m_UsingPolyTree) FixupFirstLefts2(outRec2, outRec1);
-
- if ((outRec2->IsHole ^ m_ReverseOutput) == (Area(*outRec2) > 0))
- ReversePolyPtLinks(outRec2->Pts);
-
- } else if (Poly2ContainsPoly1(outRec1->Pts, outRec2->Pts))
- {
- //outRec2 contains outRec1 ...
- outRec2->IsHole = outRec1->IsHole;
- outRec1->IsHole = !outRec2->IsHole;
- outRec2->FirstLeft = outRec1->FirstLeft;
- outRec1->FirstLeft = outRec2;
-
- if (m_UsingPolyTree) FixupFirstLefts2(outRec1, outRec2);
-
- if ((outRec1->IsHole ^ m_ReverseOutput) == (Area(*outRec1) > 0))
- ReversePolyPtLinks(outRec1->Pts);
- }
- else
- {
- //the 2 polygons are completely separate ...
- outRec2->IsHole = outRec1->IsHole;
- outRec2->FirstLeft = outRec1->FirstLeft;
-
- //fixup FirstLeft pointers that may need reassigning to OutRec2
- if (m_UsingPolyTree) FixupFirstLefts1(outRec1, outRec2);
- }
-
- } else
- {
- //joined 2 polygons together ...
-
- outRec2->Pts = 0;
- outRec2->BottomPt = 0;
- outRec2->Idx = outRec1->Idx;
-
- outRec1->IsHole = holeStateRec->IsHole;
- if (holeStateRec == outRec2)
- outRec1->FirstLeft = outRec2->FirstLeft;
- outRec2->FirstLeft = outRec1;
-
- if (m_UsingPolyTree) FixupFirstLefts3(outRec2, outRec1);
- }
- }
-}
-
-//------------------------------------------------------------------------------
-// ClipperOffset support functions ...
-//------------------------------------------------------------------------------
-
-DoublePoint GetUnitNormal(const IntPoint &pt1, const IntPoint &pt2)
-{
- if(pt2.X == pt1.X && pt2.Y == pt1.Y)
- return DoublePoint(0, 0);
-
- double Dx = (double)(pt2.X - pt1.X);
- double dy = (double)(pt2.Y - pt1.Y);
- double f = 1 *1.0/ std::sqrt( Dx*Dx + dy*dy );
- Dx *= f;
- dy *= f;
- return DoublePoint(dy, -Dx);
-}
-
-//------------------------------------------------------------------------------
-// ClipperOffset class
-//------------------------------------------------------------------------------
-
-ClipperOffset::ClipperOffset(double miterLimit, double arcTolerance)
-{
- this->MiterLimit = miterLimit;
- this->ArcTolerance = arcTolerance;
- m_lowest.X = -1;
-}
-//------------------------------------------------------------------------------
-
-ClipperOffset::~ClipperOffset()
-{
- Clear();
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::Clear()
-{
- for (int i = 0; i < m_polyNodes.ChildCount(); ++i)
- delete m_polyNodes.Childs[i];
- m_polyNodes.Childs.clear();
- m_lowest.X = -1;
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::AddPath(const Path& path, JoinType joinType, EndType endType)
-{
- int highI = (int)path.size() - 1;
- if (highI < 0) return;
- PolyNode* newNode = new PolyNode();
- newNode->m_jointype = joinType;
- newNode->m_endtype = endType;
-
- //strip duplicate points from path and also get index to the lowest point ...
- if (endType == etClosedLine || endType == etClosedPolygon)
- while (highI > 0 && path[0] == path[highI]) highI--;
- newNode->Contour.reserve(highI + 1);
- newNode->Contour.push_back(path[0]);
- int j = 0, k = 0;
- for (int i = 1; i <= highI; i++)
- if (newNode->Contour[j] != path[i])
- {
- j++;
- newNode->Contour.push_back(path[i]);
- if (path[i].Y > newNode->Contour[k].Y ||
- (path[i].Y == newNode->Contour[k].Y &&
- path[i].X < newNode->Contour[k].X)) k = j;
- }
- if (endType == etClosedPolygon && j < 2)
- {
- delete newNode;
- return;
- }
- m_polyNodes.AddChild(*newNode);
-
- //if this path's lowest pt is lower than all the others then update m_lowest
- if (endType != etClosedPolygon) return;
- if (m_lowest.X < 0)
- m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k);
- else
- {
- IntPoint ip = m_polyNodes.Childs[(int)m_lowest.X]->Contour[(int)m_lowest.Y];
- if (newNode->Contour[k].Y > ip.Y ||
- (newNode->Contour[k].Y == ip.Y &&
- newNode->Contour[k].X < ip.X))
- m_lowest = IntPoint(m_polyNodes.ChildCount() - 1, k);
- }
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::AddPaths(const Paths& paths, JoinType joinType, EndType endType)
-{
- for (Paths::size_type i = 0; i < paths.size(); ++i)
- AddPath(paths[i], joinType, endType);
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::FixOrientations()
-{
- //fixup orientations of all closed paths if the orientation of the
- //closed path with the lowermost vertex is wrong ...
- if (m_lowest.X >= 0 &&
- !Orientation(m_polyNodes.Childs[(int)m_lowest.X]->Contour))
- {
- for (int i = 0; i < m_polyNodes.ChildCount(); ++i)
- {
- PolyNode& node = *m_polyNodes.Childs[i];
- if (node.m_endtype == etClosedPolygon ||
- (node.m_endtype == etClosedLine && Orientation(node.Contour)))
- ReversePath(node.Contour);
- }
- } else
- {
- for (int i = 0; i < m_polyNodes.ChildCount(); ++i)
- {
- PolyNode& node = *m_polyNodes.Childs[i];
- if (node.m_endtype == etClosedLine && !Orientation(node.Contour))
- ReversePath(node.Contour);
- }
- }
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::Execute(Paths& solution, double delta)
-{
- solution.clear();
- FixOrientations();
- DoOffset(delta);
-
- //now clean up 'corners' ...
- Clipper clpr;
- clpr.AddPaths(m_destPolys, ptSubject, true);
- if (delta > 0)
- {
- clpr.Execute(ctUnion, solution, pftPositive, pftPositive);
- }
- else
- {
- IntRect r = clpr.GetBounds();
- Path outer(4);
- outer[0] = IntPoint(r.left - 10, r.bottom + 10);
- outer[1] = IntPoint(r.right + 10, r.bottom + 10);
- outer[2] = IntPoint(r.right + 10, r.top - 10);
- outer[3] = IntPoint(r.left - 10, r.top - 10);
-
- clpr.AddPath(outer, ptSubject, true);
- clpr.ReverseSolution(true);
- clpr.Execute(ctUnion, solution, pftNegative, pftNegative);
- if (solution.size() > 0) solution.erase(solution.begin());
- }
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::Execute(PolyTree& solution, double delta)
-{
- solution.Clear();
- FixOrientations();
- DoOffset(delta);
-
- //now clean up 'corners' ...
- Clipper clpr;
- clpr.AddPaths(m_destPolys, ptSubject, true);
- if (delta > 0)
- {
- clpr.Execute(ctUnion, solution, pftPositive, pftPositive);
- }
- else
- {
- IntRect r = clpr.GetBounds();
- Path outer(4);
- outer[0] = IntPoint(r.left - 10, r.bottom + 10);
- outer[1] = IntPoint(r.right + 10, r.bottom + 10);
- outer[2] = IntPoint(r.right + 10, r.top - 10);
- outer[3] = IntPoint(r.left - 10, r.top - 10);
-
- clpr.AddPath(outer, ptSubject, true);
- clpr.ReverseSolution(true);
- clpr.Execute(ctUnion, solution, pftNegative, pftNegative);
- //remove the outer PolyNode rectangle ...
- if (solution.ChildCount() == 1 && solution.Childs[0]->ChildCount() > 0)
- {
- PolyNode* outerNode = solution.Childs[0];
- solution.Childs.reserve(outerNode->ChildCount());
- solution.Childs[0] = outerNode->Childs[0];
- solution.Childs[0]->Parent = outerNode->Parent;
- for (int i = 1; i < outerNode->ChildCount(); ++i)
- solution.AddChild(*outerNode->Childs[i]);
- }
- else
- solution.Clear();
- }
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::DoOffset(double delta)
-{
- m_destPolys.clear();
- m_delta = delta;
-
- //if Zero offset, just copy any CLOSED polygons to m_p and return ...
- if (NEAR_ZERO(delta))
- {
- m_destPolys.reserve(m_polyNodes.ChildCount());
- for (int i = 0; i < m_polyNodes.ChildCount(); i++)
- {
- PolyNode& node = *m_polyNodes.Childs[i];
- if (node.m_endtype == etClosedPolygon)
- m_destPolys.push_back(node.Contour);
- }
- return;
- }
-
- //see offset_triginometry3.svg in the documentation folder ...
- if (MiterLimit > 2) m_miterLim = 2/(MiterLimit * MiterLimit);
- else m_miterLim = 0.5;
-
- double y;
- if (ArcTolerance <= 0.0) y = def_arc_tolerance;
- else if (ArcTolerance > std::fabs(delta) * def_arc_tolerance)
- y = std::fabs(delta) * def_arc_tolerance;
- else y = ArcTolerance;
- //see offset_triginometry2.svg in the documentation folder ...
- double steps = pi / std::acos(1 - y / std::fabs(delta));
- if (steps > std::fabs(delta) * pi)
- steps = std::fabs(delta) * pi; //ie excessive precision check
- m_sin = std::sin(two_pi / steps);
- m_cos = std::cos(two_pi / steps);
- m_StepsPerRad = steps / two_pi;
- if (delta < 0.0) m_sin = -m_sin;
-
- m_destPolys.reserve(m_polyNodes.ChildCount() * 2);
- for (int i = 0; i < m_polyNodes.ChildCount(); i++)
- {
- PolyNode& node = *m_polyNodes.Childs[i];
- m_srcPoly = node.Contour;
-
- int len = (int)m_srcPoly.size();
- if (len == 0 || (delta <= 0 && (len < 3 || node.m_endtype != etClosedPolygon)))
- continue;
-
- m_destPoly.clear();
- if (len == 1)
- {
- if (node.m_jointype == jtRound)
- {
- double X = 1.0, Y = 0.0;
- for (cInt j = 1; j <= steps; j++)
- {
- m_destPoly.push_back(IntPoint(
- Round(m_srcPoly[0].X + X * delta),
- Round(m_srcPoly[0].Y + Y * delta)));
- double X2 = X;
- X = X * m_cos - m_sin * Y;
- Y = X2 * m_sin + Y * m_cos;
- }
- }
- else
- {
- double X = -1.0, Y = -1.0;
- for (int j = 0; j < 4; ++j)
- {
- m_destPoly.push_back(IntPoint(
- Round(m_srcPoly[0].X + X * delta),
- Round(m_srcPoly[0].Y + Y * delta)));
- if (X < 0) X = 1;
- else if (Y < 0) Y = 1;
- else X = -1;
- }
- }
- m_destPolys.push_back(m_destPoly);
- continue;
- }
- //build m_normals ...
- m_normals.clear();
- m_normals.reserve(len);
- for (int j = 0; j < len - 1; ++j)
- m_normals.push_back(GetUnitNormal(m_srcPoly[j], m_srcPoly[j + 1]));
- if (node.m_endtype == etClosedLine || node.m_endtype == etClosedPolygon)
- m_normals.push_back(GetUnitNormal(m_srcPoly[len - 1], m_srcPoly[0]));
- else
- m_normals.push_back(DoublePoint(m_normals[len - 2]));
-
- if (node.m_endtype == etClosedPolygon)
- {
- int k = len - 1;
- for (int j = 0; j < len; ++j)
- OffsetPoint(j, k, node.m_jointype);
- m_destPolys.push_back(m_destPoly);
- }
- else if (node.m_endtype == etClosedLine)
- {
- int k = len - 1;
- for (int j = 0; j < len; ++j)
- OffsetPoint(j, k, node.m_jointype);
- m_destPolys.push_back(m_destPoly);
- m_destPoly.clear();
- //re-build m_normals ...
- DoublePoint n = m_normals[len -1];
- for (int j = len - 1; j > 0; j--)
- m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y);
- m_normals[0] = DoublePoint(-n.X, -n.Y);
- k = 0;
- for (int j = len - 1; j >= 0; j--)
- OffsetPoint(j, k, node.m_jointype);
- m_destPolys.push_back(m_destPoly);
- }
- else
- {
- int k = 0;
- for (int j = 1; j < len - 1; ++j)
- OffsetPoint(j, k, node.m_jointype);
-
- IntPoint pt1;
- if (node.m_endtype == etOpenButt)
- {
- int j = len - 1;
- pt1 = IntPoint((cInt)Round(m_srcPoly[j].X + m_normals[j].X *
- delta), (cInt)Round(m_srcPoly[j].Y + m_normals[j].Y * delta));
- m_destPoly.push_back(pt1);
- pt1 = IntPoint((cInt)Round(m_srcPoly[j].X - m_normals[j].X *
- delta), (cInt)Round(m_srcPoly[j].Y - m_normals[j].Y * delta));
- m_destPoly.push_back(pt1);
- }
- else
- {
- int j = len - 1;
- k = len - 2;
- m_sinA = 0;
- m_normals[j] = DoublePoint(-m_normals[j].X, -m_normals[j].Y);
- if (node.m_endtype == etOpenSquare)
- DoSquare(j, k);
- else
- DoRound(j, k);
- }
-
- //re-build m_normals ...
- for (int j = len - 1; j > 0; j--)
- m_normals[j] = DoublePoint(-m_normals[j - 1].X, -m_normals[j - 1].Y);
- m_normals[0] = DoublePoint(-m_normals[1].X, -m_normals[1].Y);
-
- k = len - 1;
- for (int j = k - 1; j > 0; --j) OffsetPoint(j, k, node.m_jointype);
-
- if (node.m_endtype == etOpenButt)
- {
- pt1 = IntPoint((cInt)Round(m_srcPoly[0].X - m_normals[0].X * delta),
- (cInt)Round(m_srcPoly[0].Y - m_normals[0].Y * delta));
- m_destPoly.push_back(pt1);
- pt1 = IntPoint((cInt)Round(m_srcPoly[0].X + m_normals[0].X * delta),
- (cInt)Round(m_srcPoly[0].Y + m_normals[0].Y * delta));
- m_destPoly.push_back(pt1);
- }
- else
- {
- k = 1;
- m_sinA = 0;
- if (node.m_endtype == etOpenSquare)
- DoSquare(0, 1);
- else
- DoRound(0, 1);
- }
- m_destPolys.push_back(m_destPoly);
- }
- }
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::OffsetPoint(int j, int& k, JoinType jointype)
-{
- //cross product ...
- m_sinA = (m_normals[k].X * m_normals[j].Y - m_normals[j].X * m_normals[k].Y);
- if (std::fabs(m_sinA * m_delta) < 1.0)
- {
- //dot product ...
- double cosA = (m_normals[k].X * m_normals[j].X + m_normals[j].Y * m_normals[k].Y );
- if (cosA > 0) // angle => 0 degrees
- {
- m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta),
- Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta)));
- return;
- }
- //else angle => 180 degrees
- }
- else if (m_sinA > 1.0) m_sinA = 1.0;
- else if (m_sinA < -1.0) m_sinA = -1.0;
-
- if (m_sinA * m_delta < 0)
- {
- m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + m_normals[k].X * m_delta),
- Round(m_srcPoly[j].Y + m_normals[k].Y * m_delta)));
- m_destPoly.push_back(m_srcPoly[j]);
- m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + m_normals[j].X * m_delta),
- Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta)));
- }
- else
- switch (jointype)
- {
- case jtMiter:
- {
- double r = 1 + (m_normals[j].X * m_normals[k].X +
- m_normals[j].Y * m_normals[k].Y);
- if (r >= m_miterLim) DoMiter(j, k, r); else DoSquare(j, k);
- break;
- }
- case jtSquare: DoSquare(j, k); break;
- case jtRound: DoRound(j, k); break;
- }
- k = j;
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::DoSquare(int j, int k)
-{
- double dx = std::tan(std::atan2(m_sinA,
- m_normals[k].X * m_normals[j].X + m_normals[k].Y * m_normals[j].Y) / 4);
- m_destPoly.push_back(IntPoint(
- Round(m_srcPoly[j].X + m_delta * (m_normals[k].X - m_normals[k].Y * dx)),
- Round(m_srcPoly[j].Y + m_delta * (m_normals[k].Y + m_normals[k].X * dx))));
- m_destPoly.push_back(IntPoint(
- Round(m_srcPoly[j].X + m_delta * (m_normals[j].X + m_normals[j].Y * dx)),
- Round(m_srcPoly[j].Y + m_delta * (m_normals[j].Y - m_normals[j].X * dx))));
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::DoMiter(int j, int k, double r)
-{
- double q = m_delta / r;
- m_destPoly.push_back(IntPoint(Round(m_srcPoly[j].X + (m_normals[k].X + m_normals[j].X) * q),
- Round(m_srcPoly[j].Y + (m_normals[k].Y + m_normals[j].Y) * q)));
-}
-//------------------------------------------------------------------------------
-
-void ClipperOffset::DoRound(int j, int k)
-{
- double a = std::atan2(m_sinA,
- m_normals[k].X * m_normals[j].X + m_normals[k].Y * m_normals[j].Y);
- int steps = std::max((int)Round(m_StepsPerRad * std::fabs(a)), 1);
-
- double X = m_normals[k].X, Y = m_normals[k].Y, X2;
- for (int i = 0; i < steps; ++i)
- {
- m_destPoly.push_back(IntPoint(
- Round(m_srcPoly[j].X + X * m_delta),
- Round(m_srcPoly[j].Y + Y * m_delta)));
- X2 = X;
- X = X * m_cos - m_sin * Y;
- Y = X2 * m_sin + Y * m_cos;
- }
- m_destPoly.push_back(IntPoint(
- Round(m_srcPoly[j].X + m_normals[j].X * m_delta),
- Round(m_srcPoly[j].Y + m_normals[j].Y * m_delta)));
-}
-
-//------------------------------------------------------------------------------
-// Miscellaneous public functions
-//------------------------------------------------------------------------------
-
-void Clipper::DoSimplePolygons()
-{
- PolyOutList::size_type i = 0;
- while (i < m_PolyOuts.size())
- {
- OutRec* outrec = m_PolyOuts[i++];
- OutPt* op = outrec->Pts;
- if (!op || outrec->IsOpen) continue;
- do //for each Pt in Polygon until duplicate found do ...
- {
- OutPt* op2 = op->Next;
- while (op2 != outrec->Pts)
- {
- if ((op->Pt == op2->Pt) && op2->Next != op && op2->Prev != op)
- {
- //split the polygon into two ...
- OutPt* op3 = op->Prev;
- OutPt* op4 = op2->Prev;
- op->Prev = op4;
- op4->Next = op;
- op2->Prev = op3;
- op3->Next = op2;
-
- outrec->Pts = op;
- OutRec* outrec2 = CreateOutRec();
- outrec2->Pts = op2;
- UpdateOutPtIdxs(*outrec2);
- if (Poly2ContainsPoly1(outrec2->Pts, outrec->Pts))
- {
- //OutRec2 is contained by OutRec1 ...
- outrec2->IsHole = !outrec->IsHole;
- outrec2->FirstLeft = outrec;
- if (m_UsingPolyTree) FixupFirstLefts2(outrec2, outrec);
- }
- else
- if (Poly2ContainsPoly1(outrec->Pts, outrec2->Pts))
- {
- //OutRec1 is contained by OutRec2 ...
- outrec2->IsHole = outrec->IsHole;
- outrec->IsHole = !outrec2->IsHole;
- outrec2->FirstLeft = outrec->FirstLeft;
- outrec->FirstLeft = outrec2;
- if (m_UsingPolyTree) FixupFirstLefts2(outrec, outrec2);
- }
- else
- {
- //the 2 polygons are separate ...
- outrec2->IsHole = outrec->IsHole;
- outrec2->FirstLeft = outrec->FirstLeft;
- if (m_UsingPolyTree) FixupFirstLefts1(outrec, outrec2);
- }
- op2 = op; //ie get ready for the Next iteration
- }
- op2 = op2->Next;
- }
- op = op->Next;
- }
- while (op != outrec->Pts);
- }
-}
-//------------------------------------------------------------------------------
-
-void ReversePath(Path& p)
-{
- std::reverse(p.begin(), p.end());
-}
-//------------------------------------------------------------------------------
-
-void ReversePaths(Paths& p)
-{
- for (Paths::size_type i = 0; i < p.size(); ++i)
- ReversePath(p[i]);
-}
-//------------------------------------------------------------------------------
-
-void SimplifyPolygon(const Path &in_poly, Paths &out_polys, PolyFillType fillType)
-{
- Clipper c;
- c.StrictlySimple(true);
- c.AddPath(in_poly, ptSubject, true);
- c.Execute(ctUnion, out_polys, fillType, fillType);
-}
-//------------------------------------------------------------------------------
-
-void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, PolyFillType fillType)
-{
- Clipper c;
- c.StrictlySimple(true);
- c.AddPaths(in_polys, ptSubject, true);
- c.Execute(ctUnion, out_polys, fillType, fillType);
-}
-//------------------------------------------------------------------------------
-
-void SimplifyPolygons(Paths &polys, PolyFillType fillType)
-{
- SimplifyPolygons(polys, polys, fillType);
-}
-//------------------------------------------------------------------------------
-
-inline double DistanceSqrd(const IntPoint& pt1, const IntPoint& pt2)
-{
- double Dx = ((double)pt1.X - pt2.X);
- double dy = ((double)pt1.Y - pt2.Y);
- return (Dx*Dx + dy*dy);
-}
-//------------------------------------------------------------------------------
-
-double DistanceFromLineSqrd(
- const IntPoint& pt, const IntPoint& ln1, const IntPoint& ln2)
-{
- //The equation of a line in general form (Ax + By + C = 0)
- //given 2 points (x,y) & (x,y) is ...
- //(y - y)x + (x - x)y + (y - y)x - (x - x)y = 0
- //A = (y - y); B = (x - x); C = (y - y)x - (x - x)y
- //perpendicular distance of point (x,y) = (Ax + By + C)/Sqrt(A + B)
- //see http://en.wikipedia.org/wiki/Perpendicular_distance
- double A = double(ln1.Y - ln2.Y);
- double B = double(ln2.X - ln1.X);
- double C = A * ln1.X + B * ln1.Y;
- C = A * pt.X + B * pt.Y - C;
- return (C * C) / (A * A + B * B);
-}
-//---------------------------------------------------------------------------
-
-bool SlopesNearCollinear(const IntPoint& pt1,
- const IntPoint& pt2, const IntPoint& pt3, double distSqrd)
-{
- //this function is more accurate when the point that's geometrically
- //between the other 2 points is the one that's tested for distance.
- //ie makes it more likely to pick up 'spikes' ...
- if (Abs(pt1.X - pt2.X) > Abs(pt1.Y - pt2.Y))
- {
- if ((pt1.X > pt2.X) == (pt1.X < pt3.X))
- return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd;
- else if ((pt2.X > pt1.X) == (pt2.X < pt3.X))
- return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd;
- else
- return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd;
- }
- else
- {
- if ((pt1.Y > pt2.Y) == (pt1.Y < pt3.Y))
- return DistanceFromLineSqrd(pt1, pt2, pt3) < distSqrd;
- else if ((pt2.Y > pt1.Y) == (pt2.Y < pt3.Y))
- return DistanceFromLineSqrd(pt2, pt1, pt3) < distSqrd;
- else
- return DistanceFromLineSqrd(pt3, pt1, pt2) < distSqrd;
- }
-}
-//------------------------------------------------------------------------------
-
-bool PointsAreClose(IntPoint pt1, IntPoint pt2, double distSqrd)
-{
- double Dx = (double)pt1.X - pt2.X;
- double dy = (double)pt1.Y - pt2.Y;
- return ((Dx * Dx) + (dy * dy) <= distSqrd);
-}
-//------------------------------------------------------------------------------
-
-OutPt* ExcludeOp(OutPt* op)
-{
- OutPt* result = op->Prev;
- result->Next = op->Next;
- op->Next->Prev = result;
- result->Idx = 0;
- return result;
-}
-//------------------------------------------------------------------------------
-
-void CleanPolygon(const Path& in_poly, Path& out_poly, double distance)
-{
- //distance = proximity in units/pixels below which vertices
- //will be stripped. Default ~= sqrt(2).
-
- size_t size = in_poly.size();
-
- if (size == 0)
- {
- out_poly.clear();
- return;
- }
-
- OutPt* outPts = new OutPt[size];
- for (size_t i = 0; i < size; ++i)
- {
- outPts[i].Pt = in_poly[i];
- outPts[i].Next = &outPts[(i + 1) % size];
- outPts[i].Next->Prev = &outPts[i];
- outPts[i].Idx = 0;
- }
-
- double distSqrd = distance * distance;
- OutPt* op = &outPts[0];
- while (op->Idx == 0 && op->Next != op->Prev)
- {
- if (PointsAreClose(op->Pt, op->Prev->Pt, distSqrd))
- {
- op = ExcludeOp(op);
- size--;
- }
- else if (PointsAreClose(op->Prev->Pt, op->Next->Pt, distSqrd))
- {
- ExcludeOp(op->Next);
- op = ExcludeOp(op);
- size -= 2;
- }
- else if (SlopesNearCollinear(op->Prev->Pt, op->Pt, op->Next->Pt, distSqrd))
- {
- op = ExcludeOp(op);
- size--;
- }
- else
- {
- op->Idx = 1;
- op = op->Next;
- }
- }
-
- if (size < 3) size = 0;
- out_poly.resize(size);
- for (size_t i = 0; i < size; ++i)
- {
- out_poly[i] = op->Pt;
- op = op->Next;
- }
- delete [] outPts;
-}
-//------------------------------------------------------------------------------
-
-void CleanPolygon(Path& poly, double distance)
-{
- CleanPolygon(poly, poly, distance);
-}
-//------------------------------------------------------------------------------
-
-void CleanPolygons(const Paths& in_polys, Paths& out_polys, double distance)
-{
- out_polys.resize(in_polys.size());
- for (Paths::size_type i = 0; i < in_polys.size(); ++i)
- CleanPolygon(in_polys[i], out_polys[i], distance);
-}
-//------------------------------------------------------------------------------
-
-void CleanPolygons(Paths& polys, double distance)
-{
- CleanPolygons(polys, polys, distance);
-}
-//------------------------------------------------------------------------------
-
-void Minkowski(const Path& poly, const Path& path,
- Paths& solution, bool isSum, bool isClosed)
-{
- int delta = (isClosed ? 1 : 0);
- size_t polyCnt = poly.size();
- size_t pathCnt = path.size();
- Paths pp;
- pp.reserve(pathCnt);
- if (isSum)
- for (size_t i = 0; i < pathCnt; ++i)
- {
- Path p;
- p.reserve(polyCnt);
- for (size_t j = 0; j < poly.size(); ++j)
- p.push_back(IntPoint(path[i].X + poly[j].X, path[i].Y + poly[j].Y));
- pp.push_back(p);
- }
- else
- for (size_t i = 0; i < pathCnt; ++i)
- {
- Path p;
- p.reserve(polyCnt);
- for (size_t j = 0; j < poly.size(); ++j)
- p.push_back(IntPoint(path[i].X - poly[j].X, path[i].Y - poly[j].Y));
- pp.push_back(p);
- }
-
- solution.clear();
- solution.reserve((pathCnt + delta) * (polyCnt + 1));
- for (size_t i = 0; i < pathCnt - 1 + delta; ++i)
- for (size_t j = 0; j < polyCnt; ++j)
- {
- Path quad;
- quad.reserve(4);
- quad.push_back(pp[i % pathCnt][j % polyCnt]);
- quad.push_back(pp[(i + 1) % pathCnt][j % polyCnt]);
- quad.push_back(pp[(i + 1) % pathCnt][(j + 1) % polyCnt]);
- quad.push_back(pp[i % pathCnt][(j + 1) % polyCnt]);
- if (!Orientation(quad)) ReversePath(quad);
- solution.push_back(quad);
- }
-}
-//------------------------------------------------------------------------------
-
-void MinkowskiSum(const Path& pattern, const Path& path, Paths& solution, bool pathIsClosed)
-{
- Minkowski(pattern, path, solution, true, pathIsClosed);
- Clipper c;
- c.AddPaths(solution, ptSubject, true);
- c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
-}
-//------------------------------------------------------------------------------
-
-void TranslatePath(const Path& input, Path& output, const IntPoint delta)
-{
- //precondition: input != output
- output.resize(input.size());
- for (size_t i = 0; i < input.size(); ++i)
- output[i] = IntPoint(input[i].X + delta.X, input[i].Y + delta.Y);
-}
-//------------------------------------------------------------------------------
-
-void MinkowskiSum(const Path& pattern, const Paths& paths, Paths& solution, bool pathIsClosed)
-{
- Clipper c;
- for (size_t i = 0; i < paths.size(); ++i)
- {
- Paths tmp;
- Minkowski(pattern, paths[i], tmp, true, pathIsClosed);
- c.AddPaths(tmp, ptSubject, true);
- if (pathIsClosed)
- {
- Path tmp2;
- TranslatePath(paths[i], tmp2, pattern[0]);
- c.AddPath(tmp2, ptClip, true);
- }
- }
- c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
-}
-//------------------------------------------------------------------------------
-
-void MinkowskiDiff(const Path& poly1, const Path& poly2, Paths& solution)
-{
- Minkowski(poly1, poly2, solution, false, true);
- Clipper c;
- c.AddPaths(solution, ptSubject, true);
- c.Execute(ctUnion, solution, pftNonZero, pftNonZero);
-}
-//------------------------------------------------------------------------------
-
-enum NodeType {ntAny, ntOpen, ntClosed};
-
-void AddPolyNodeToPaths(const PolyNode& polynode, NodeType nodetype, Paths& paths)
-{
- bool match = true;
- if (nodetype == ntClosed) match = !polynode.IsOpen();
- else if (nodetype == ntOpen) return;
-
- if (!polynode.Contour.empty() && match)
- paths.push_back(polynode.Contour);
- for (int i = 0; i < polynode.ChildCount(); ++i)
- AddPolyNodeToPaths(*polynode.Childs[i], nodetype, paths);
-}
-//------------------------------------------------------------------------------
-
-void PolyTreeToPaths(const PolyTree& polytree, Paths& paths)
-{
- paths.resize(0);
- paths.reserve(polytree.Total());
- AddPolyNodeToPaths(polytree, ntAny, paths);
-}
-//------------------------------------------------------------------------------
-
-void ClosedPathsFromPolyTree(const PolyTree& polytree, Paths& paths)
-{
- paths.resize(0);
- paths.reserve(polytree.Total());
- AddPolyNodeToPaths(polytree, ntClosed, paths);
-}
-//------------------------------------------------------------------------------
-
-void OpenPathsFromPolyTree(PolyTree& polytree, Paths& paths)
-{
- paths.resize(0);
- paths.reserve(polytree.Total());
- //Open paths are top level only, so ...
- for (int i = 0; i < polytree.ChildCount(); ++i)
- if (polytree.Childs[i]->IsOpen())
- paths.push_back(polytree.Childs[i]->Contour);
-}
-//------------------------------------------------------------------------------
-
-std::ostream& operator <<(std::ostream &s, const IntPoint &p)
-{
- s << "(" << p.X << "," << p.Y << ")";
- return s;
-}
-//------------------------------------------------------------------------------
-
-std::ostream& operator <<(std::ostream &s, const Path &p)
-{
- if (p.empty()) return s;
- Path::size_type last = p.size() -1;
- for (Path::size_type i = 0; i < last; i++)
- s << "(" << p[i].X << "," << p[i].Y << "), ";
- s << "(" << p[last].X << "," << p[last].Y << ")\n";
- return s;
-}
-//------------------------------------------------------------------------------
-
-std::ostream& operator <<(std::ostream &s, const Paths &p)
-{
- for (Paths::size_type i = 0; i < p.size(); i++)
- s << p[i];
- s << "\n";
- return s;
-}
-//------------------------------------------------------------------------------
-
-} //ClipperLib namespace
diff --git a/ppocr/postprocess/lanms/include/clipper/clipper.hpp b/ppocr/postprocess/lanms/include/clipper/clipper.hpp
deleted file mode 100644
index c595ebc3248d191157999c3c6adada4bf3fe22a3..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/include/clipper/clipper.hpp
+++ /dev/null
@@ -1,404 +0,0 @@
-/*******************************************************************************
-* *
-* Author : Angus Johnson *
-* Version : 6.4.0 *
-* Date : 2 July 2015 *
-* Website : http://www.angusj.com *
-* Copyright : Angus Johnson 2010-2015 *
-* *
-* License: *
-* Use, modification & distribution is subject to Boost Software License Ver 1. *
-* http://www.boost.org/LICENSE_1_0.txt *
-* *
-* Attributions: *
-* The code in this library is an extension of Bala Vatti's clipping algorithm: *
-* "A generic solution to polygon clipping" *
-* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
-* http://portal.acm.org/citation.cfm?id=129906 *
-* *
-* Computer graphics and geometric modeling: implementation and algorithms *
-* By Max K. Agoston *
-* Springer; 1 edition (January 4, 2005) *
-* http://books.google.com/books?q=vatti+clipping+agoston *
-* *
-* See also: *
-* "Polygon Offsetting by Computing Winding Numbers" *
-* Paper no. DETC2005-85513 pp. 565-575 *
-* ASME 2005 International Design Engineering Technical Conferences *
-* and Computers and Information in Engineering Conference (IDETC/CIE2005) *
-* September 24-28, 2005 , Long Beach, California, USA *
-* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
-* *
-*******************************************************************************/
-
-#ifndef clipper_hpp
-#define clipper_hpp
-
-#define CLIPPER_VERSION "6.2.6"
-
-//use_int32: When enabled 32bit ints are used instead of 64bit ints. This
-//improve performance but coordinate values are limited to the range +/- 46340
-//#define use_int32
-
-//use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
-//#define use_xyz
-
-//use_lines: Enables line clipping. Adds a very minor cost to performance.
-#define use_lines
-
-//use_deprecated: Enables temporary support for the obsolete functions
-//#define use_deprecated
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace ClipperLib {
-
-enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
-enum PolyType { ptSubject, ptClip };
-//By far the most widely used winding rules for polygon filling are
-//EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
-//Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
-//see http://glprogramming.com/red/chapter11.html
-enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
-
-#ifdef use_int32
- typedef int cInt;
- static cInt const loRange = 0x7FFF;
- static cInt const hiRange = 0x7FFF;
-#else
- typedef signed long long cInt;
- static cInt const loRange = 0x3FFFFFFF;
- static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
- typedef signed long long long64; //used by Int128 class
- typedef unsigned long long ulong64;
-
-#endif
-
-struct IntPoint {
- cInt X;
- cInt Y;
-#ifdef use_xyz
- cInt Z;
- IntPoint(cInt x = 0, cInt y = 0, cInt z = 0): X(x), Y(y), Z(z) {};
-#else
- IntPoint(cInt x = 0, cInt y = 0): X(x), Y(y) {};
-#endif
-
- friend inline bool operator== (const IntPoint& a, const IntPoint& b)
- {
- return a.X == b.X && a.Y == b.Y;
- }
- friend inline bool operator!= (const IntPoint& a, const IntPoint& b)
- {
- return a.X != b.X || a.Y != b.Y;
- }
-};
-//------------------------------------------------------------------------------
-
-typedef std::vector< IntPoint > Path;
-typedef std::vector< Path > Paths;
-
-inline Path& operator <<(Path& poly, const IntPoint& p) {poly.push_back(p); return poly;}
-inline Paths& operator <<(Paths& polys, const Path& p) {polys.push_back(p); return polys;}
-
-std::ostream& operator <<(std::ostream &s, const IntPoint &p);
-std::ostream& operator <<(std::ostream &s, const Path &p);
-std::ostream& operator <<(std::ostream &s, const Paths &p);
-
-struct DoublePoint
-{
- double X;
- double Y;
- DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
- DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
-};
-//------------------------------------------------------------------------------
-
-#ifdef use_xyz
-typedef void (*ZFillCallback)(IntPoint& e1bot, IntPoint& e1top, IntPoint& e2bot, IntPoint& e2top, IntPoint& pt);
-#endif
-
-enum InitOptions {ioReverseSolution = 1, ioStrictlySimple = 2, ioPreserveCollinear = 4};
-enum JoinType {jtSquare, jtRound, jtMiter};
-enum EndType {etClosedPolygon, etClosedLine, etOpenButt, etOpenSquare, etOpenRound};
-
-class PolyNode;
-typedef std::vector< PolyNode* > PolyNodes;
-
-class PolyNode
-{
-public:
- PolyNode();
- virtual ~PolyNode(){};
- Path Contour;
- PolyNodes Childs;
- PolyNode* Parent;
- PolyNode* GetNext() const;
- bool IsHole() const;
- bool IsOpen() const;
- int ChildCount() const;
-private:
- unsigned Index; //node index in Parent.Childs
- bool m_IsOpen;
- JoinType m_jointype;
- EndType m_endtype;
- PolyNode* GetNextSiblingUp() const;
- void AddChild(PolyNode& child);
- friend class Clipper; //to access Index
- friend class ClipperOffset;
-};
-
-class PolyTree: public PolyNode
-{
-public:
- ~PolyTree(){Clear();};
- PolyNode* GetFirst() const;
- void Clear();
- int Total() const;
-private:
- PolyNodes AllNodes;
- friend class Clipper; //to access AllNodes
-};
-
-bool Orientation(const Path &poly);
-double Area(const Path &poly);
-int PointInPolygon(const IntPoint &pt, const Path &path);
-
-void SimplifyPolygon(const Path &in_poly, Paths &out_polys, PolyFillType fillType = pftEvenOdd);
-void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, PolyFillType fillType = pftEvenOdd);
-void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
-
-void CleanPolygon(const Path& in_poly, Path& out_poly, double distance = 1.415);
-void CleanPolygon(Path& poly, double distance = 1.415);
-void CleanPolygons(const Paths& in_polys, Paths& out_polys, double distance = 1.415);
-void CleanPolygons(Paths& polys, double distance = 1.415);
-
-void MinkowskiSum(const Path& pattern, const Path& path, Paths& solution, bool pathIsClosed);
-void MinkowskiSum(const Path& pattern, const Paths& paths, Paths& solution, bool pathIsClosed);
-void MinkowskiDiff(const Path& poly1, const Path& poly2, Paths& solution);
-
-void PolyTreeToPaths(const PolyTree& polytree, Paths& paths);
-void ClosedPathsFromPolyTree(const PolyTree& polytree, Paths& paths);
-void OpenPathsFromPolyTree(PolyTree& polytree, Paths& paths);
-
-void ReversePath(Path& p);
-void ReversePaths(Paths& p);
-
-struct IntRect { cInt left; cInt top; cInt right; cInt bottom; };
-
-//enums that are used internally ...
-enum EdgeSide { esLeft = 1, esRight = 2};
-
-//forward declarations (for stuff used internally) ...
-struct TEdge;
-struct IntersectNode;
-struct LocalMinimum;
-struct OutPt;
-struct OutRec;
-struct Join;
-
-typedef std::vector < OutRec* > PolyOutList;
-typedef std::vector < TEdge* > EdgeList;
-typedef std::vector < Join* > JoinList;
-typedef std::vector < IntersectNode* > IntersectList;
-
-//------------------------------------------------------------------------------
-
-//ClipperBase is the ancestor to the Clipper class. It should not be
-//instantiated directly. This class simply abstracts the conversion of sets of
-//polygon coordinates into edge objects that are stored in a LocalMinima list.
-class ClipperBase
-{
-public:
- ClipperBase();
- virtual ~ClipperBase();
- virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
- bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
- virtual void Clear();
- IntRect GetBounds();
- bool PreserveCollinear() {return m_PreserveCollinear;};
- void PreserveCollinear(bool value) {m_PreserveCollinear = value;};
-protected:
- void DisposeLocalMinimaList();
- TEdge* AddBoundsToLML(TEdge *e, bool IsClosed);
- virtual void Reset();
- TEdge* ProcessBound(TEdge* E, bool IsClockwise);
- void InsertScanbeam(const cInt Y);
- bool PopScanbeam(cInt &Y);
- bool LocalMinimaPending();
- bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
- OutRec* CreateOutRec();
- void DisposeAllOutRecs();
- void DisposeOutRec(PolyOutList::size_type index);
- void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
- void DeleteFromAEL(TEdge *e);
- void UpdateEdgeIntoAEL(TEdge *&e);
-
- typedef std::vector MinimaList;
- MinimaList::iterator m_CurrentLM;
- MinimaList m_MinimaList;
-
- bool m_UseFullRange;
- EdgeList m_edges;
- bool m_PreserveCollinear;
- bool m_HasOpenPaths;
- PolyOutList m_PolyOuts;
- TEdge *m_ActiveEdges;
-
- typedef std::priority_queue ScanbeamList;
- ScanbeamList m_Scanbeam;
-};
-//------------------------------------------------------------------------------
-
-class Clipper : public virtual ClipperBase
-{
-public:
- Clipper(int initOptions = 0);
- bool Execute(ClipType clipType,
- Paths &solution,
- PolyFillType fillType = pftEvenOdd);
- bool Execute(ClipType clipType,
- Paths &solution,
- PolyFillType subjFillType,
- PolyFillType clipFillType);
- bool Execute(ClipType clipType,
- PolyTree &polytree,
- PolyFillType fillType = pftEvenOdd);
- bool Execute(ClipType clipType,
- PolyTree &polytree,
- PolyFillType subjFillType,
- PolyFillType clipFillType);
- bool ReverseSolution() { return m_ReverseOutput; };
- void ReverseSolution(bool value) {m_ReverseOutput = value;};
- bool StrictlySimple() {return m_StrictSimple;};
- void StrictlySimple(bool value) {m_StrictSimple = value;};
- //set the callback function for z value filling on intersections (otherwise Z is 0)
-#ifdef use_xyz
- void ZFillFunction(ZFillCallback zFillFunc);
-#endif
-protected:
- virtual bool ExecuteInternal();
-private:
- JoinList m_Joins;
- JoinList m_GhostJoins;
- IntersectList m_IntersectList;
- ClipType m_ClipType;
- typedef std::list MaximaList;
- MaximaList m_Maxima;
- TEdge *m_SortedEdges;
- bool m_ExecuteLocked;
- PolyFillType m_ClipFillType;
- PolyFillType m_SubjFillType;
- bool m_ReverseOutput;
- bool m_UsingPolyTree;
- bool m_StrictSimple;
-#ifdef use_xyz
- ZFillCallback m_ZFill; //custom callback
-#endif
- void SetWindingCount(TEdge& edge);
- bool IsEvenOddFillType(const TEdge& edge) const;
- bool IsEvenOddAltFillType(const TEdge& edge) const;
- void InsertLocalMinimaIntoAEL(const cInt botY);
- void InsertEdgeIntoAEL(TEdge *edge, TEdge* startEdge);
- void AddEdgeToSEL(TEdge *edge);
- bool PopEdgeFromSEL(TEdge *&edge);
- void CopyAELToSEL();
- void DeleteFromSEL(TEdge *e);
- void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
- bool IsContributing(const TEdge& edge) const;
- bool IsTopHorz(const cInt XPos);
- void DoMaxima(TEdge *e);
- void ProcessHorizontals();
- void ProcessHorizontal(TEdge *horzEdge);
- void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
- OutPt* AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
- OutRec* GetOutRec(int idx);
- void AppendPolygon(TEdge *e1, TEdge *e2);
- void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
- OutPt* AddOutPt(TEdge *e, const IntPoint &pt);
- OutPt* GetLastOutPt(TEdge *e);
- bool ProcessIntersections(const cInt topY);
- void BuildIntersectList(const cInt topY);
- void ProcessIntersectList();
- void ProcessEdgesAtTopOfScanbeam(const cInt topY);
- void BuildResult(Paths& polys);
- void BuildResult2(PolyTree& polytree);
- void SetHoleState(TEdge *e, OutRec *outrec);
- void DisposeIntersectNodes();
- bool FixupIntersectionOrder();
- void FixupOutPolygon(OutRec &outrec);
- void FixupOutPolyline(OutRec &outrec);
- bool IsHole(TEdge *e);
- bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
- void FixHoleLinkage(OutRec &outrec);
- void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
- void ClearJoins();
- void ClearGhostJoins();
- void AddGhostJoin(OutPt *op, const IntPoint offPt);
- bool JoinPoints(Join *j, OutRec* outRec1, OutRec* outRec2);
- void JoinCommonEdges();
- void DoSimplePolygons();
- void FixupFirstLefts1(OutRec* OldOutRec, OutRec* NewOutRec);
- void FixupFirstLefts2(OutRec* InnerOutRec, OutRec* OuterOutRec);
- void FixupFirstLefts3(OutRec* OldOutRec, OutRec* NewOutRec);
-#ifdef use_xyz
- void SetZ(IntPoint& pt, TEdge& e1, TEdge& e2);
-#endif
-};
-//------------------------------------------------------------------------------
-
-class ClipperOffset
-{
-public:
- ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
- ~ClipperOffset();
- void AddPath(const Path& path, JoinType joinType, EndType endType);
- void AddPaths(const Paths& paths, JoinType joinType, EndType endType);
- void Execute(Paths& solution, double delta);
- void Execute(PolyTree& solution, double delta);
- void Clear();
- double MiterLimit;
- double ArcTolerance;
-private:
- Paths m_destPolys;
- Path m_srcPoly;
- Path m_destPoly;
- std::vector m_normals;
- double m_delta, m_sinA, m_sin, m_cos;
- double m_miterLim, m_StepsPerRad;
- IntPoint m_lowest;
- PolyNode m_polyNodes;
-
- void FixOrientations();
- void DoOffset(double delta);
- void OffsetPoint(int j, int& k, JoinType jointype);
- void DoSquare(int j, int k);
- void DoMiter(int j, int k, double r);
- void DoRound(int j, int k);
-};
-//------------------------------------------------------------------------------
-
-class clipperException : public std::exception
-{
- public:
- clipperException(const char* description): m_descr(description) {}
- virtual ~clipperException() throw() {}
- virtual const char* what() const throw() {return m_descr.c_str();}
- private:
- std::string m_descr;
-};
-//------------------------------------------------------------------------------
-
-} //ClipperLib namespace
-
-#endif //clipper_hpp
-
-
diff --git a/ppocr/postprocess/lanms/include/pybind11/attr.h b/ppocr/postprocess/lanms/include/pybind11/attr.h
deleted file mode 100644
index b4137cb2bd8bb39a77b27b5450d3828a4aacf8a6..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/include/pybind11/attr.h
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- pybind11/attr.h: Infrastructure for processing custom
- type and function attributes
-
- Copyright (c) 2016 Wenzel Jakob
-
- All rights reserved. Use of this source code is governed by a
- BSD-style license that can be found in the LICENSE file.
-*/
-
-#pragma once
-
-#include "cast.h"
-
-NAMESPACE_BEGIN(pybind11)
-
-/// \addtogroup annotations
-/// @{
-
-/// Annotation for methods
-struct is_method { handle class_; is_method(const handle &c) : class_(c) { } };
-
-/// Annotation for operators
-struct is_operator { };
-
-/// Annotation for parent scope
-struct scope { handle value; scope(const handle &s) : value(s) { } };
-
-/// Annotation for documentation
-struct doc { const char *value; doc(const char *value) : value(value) { } };
-
-/// Annotation for function names
-struct name { const char *value; name(const char *value) : value(value) { } };
-
-/// Annotation indicating that a function is an overload associated with a given "sibling"
-struct sibling { handle value; sibling(const handle &value) : value(value.ptr()) { } };
-
-/// Annotation indicating that a class derives from another given type
-template struct base {
- PYBIND11_DEPRECATED("base() was deprecated in favor of specifying 'T' as a template argument to class_")
- base() { }
-};
-
-/// Keep patient alive while nurse lives
-template struct keep_alive { };
-
-/// Annotation indicating that a class is involved in a multiple inheritance relationship
-struct multiple_inheritance { };
-
-/// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class
-struct dynamic_attr { };
-
-/// Annotation which enables the buffer protocol for a type
-struct buffer_protocol { };
-
-/// Annotation which requests that a special metaclass is created for a type
-struct metaclass {
- handle value;
-
- PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.")
- metaclass() {}
-
- /// Override pybind11's default metaclass
- explicit metaclass(handle value) : value(value) { }
-};
-
-/// Annotation to mark enums as an arithmetic type
-struct arithmetic { };
-
-/** \rst
- A call policy which places one or more guard variables (``Ts...``) around the function call.
-
- For example, this definition:
-
- .. code-block:: cpp
-
- m.def("foo", foo, py::call_guard());
-
- is equivalent to the following pseudocode:
-
- .. code-block:: cpp
-
- m.def("foo", [](args...) {
- T scope_guard;
- return foo(args...); // forwarded arguments
- });
- \endrst */
-template struct call_guard;
-
-template <> struct call_guard<> { using type = detail::void_type; };
-
-template
-struct call_guard {
- static_assert(std::is_default_constructible::value,
- "The guard type must be default constructible");
-
- using type = T;
-};
-
-template
-struct call_guard {
- struct type {
- T guard{}; // Compose multiple guard types with left-to-right default-constructor order
- typename call_guard::type next{};
- };
-};
-
-/// @} annotations
-
-NAMESPACE_BEGIN(detail)
-/* Forward declarations */
-enum op_id : int;
-enum op_type : int;
-struct undefined_t;
-template struct op_;
-template struct init;
-template struct init_alias;
-inline void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret);
-
-/// Internal data structure which holds metadata about a keyword argument
-struct argument_record {
- const char *name; ///< Argument name
- const char *descr; ///< Human-readable version of the argument value
- handle value; ///< Associated Python object
- bool convert : 1; ///< True if the argument is allowed to convert when loading
- bool none : 1; ///< True if None is allowed when loading
-
- argument_record(const char *name, const char *descr, handle value, bool convert, bool none)
- : name(name), descr(descr), value(value), convert(convert), none(none) { }
-};
-
-/// Internal data structure which holds metadata about a bound function (signature, overloads, etc.)
-struct function_record {
- function_record()
- : is_constructor(false), is_stateless(false), is_operator(false),
- has_args(false), has_kwargs(false), is_method(false) { }
-
- /// Function name
- char *name = nullptr; /* why no C++ strings? They generate heavier code.. */
-
- // User-specified documentation string
- char *doc = nullptr;
-
- /// Human-readable version of the function signature
- char *signature = nullptr;
-
- /// List of registered keyword arguments
- std::vector args;
-
- /// Pointer to lambda function which converts arguments and performs the actual call
- handle (*impl) (function_call &) = nullptr;
-
- /// Storage for the wrapped function pointer and captured data, if any
- void *data[3] = { };
-
- /// Pointer to custom destructor for 'data' (if needed)
- void (*free_data) (function_record *ptr) = nullptr;
-
- /// Return value policy associated with this function
- return_value_policy policy = return_value_policy::automatic;
-
- /// True if name == '__init__'
- bool is_constructor : 1;
-
- /// True if this is a stateless function pointer
- bool is_stateless : 1;
-
- /// True if this is an operator (__add__), etc.
- bool is_operator : 1;
-
- /// True if the function has a '*args' argument
- bool has_args : 1;
-
- /// True if the function has a '**kwargs' argument
- bool has_kwargs : 1;
-
- /// True if this is a method
- bool is_method : 1;
-
- /// Number of arguments (including py::args and/or py::kwargs, if present)
- std::uint16_t nargs;
-
- /// Python method object
- PyMethodDef *def = nullptr;
-
- /// Python handle to the parent scope (a class or a module)
- handle scope;
-
- /// Python handle to the sibling function representing an overload chain
- handle sibling;
-
- /// Pointer to next overload
- function_record *next = nullptr;
-};
-
-/// Special data structure which (temporarily) holds metadata about a bound class
-struct type_record {
- PYBIND11_NOINLINE type_record()
- : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false) { }
-
- /// Handle to the parent scope
- handle scope;
-
- /// Name of the class
- const char *name = nullptr;
-
- // Pointer to RTTI type_info data structure
- const std::type_info *type = nullptr;
-
- /// How large is the underlying C++ type?
- size_t type_size = 0;
-
- /// How large is the type's holder?
- size_t holder_size = 0;
-
- /// The global operator new can be overridden with a class-specific variant
- void *(*operator_new)(size_t) = ::operator new;
-
- /// Function pointer to class_<..>::init_instance
- void (*init_instance)(instance *, const void *) = nullptr;
-
- /// Function pointer to class_<..>::dealloc
- void (*dealloc)(const detail::value_and_holder &) = nullptr;
-
- /// List of base classes of the newly created type
- list bases;
-
- /// Optional docstring
- const char *doc = nullptr;
-
- /// Custom metaclass (optional)
- handle metaclass;
-
- /// Multiple inheritance marker
- bool multiple_inheritance : 1;
-
- /// Does the class manage a __dict__?
- bool dynamic_attr : 1;
-
- /// Does the class implement the buffer protocol?
- bool buffer_protocol : 1;
-
- /// Is the default (unique_ptr) holder type used?
- bool default_holder : 1;
-
- PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *)) {
- auto base_info = detail::get_type_info(base, false);
- if (!base_info) {
- std::string tname(base.name());
- detail::clean_type_id(tname);
- pybind11_fail("generic_type: type \"" + std::string(name) +
- "\" referenced unknown base type \"" + tname + "\"");
- }
-
- if (default_holder != base_info->default_holder) {
- std::string tname(base.name());
- detail::clean_type_id(tname);
- pybind11_fail("generic_type: type \"" + std::string(name) + "\" " +
- (default_holder ? "does not have" : "has") +
- " a non-default holder type while its base \"" + tname + "\" " +
- (base_info->default_holder ? "does not" : "does"));
- }
-
- bases.append((PyObject *) base_info->type);
-
- if (base_info->type->tp_dictoffset != 0)
- dynamic_attr = true;
-
- if (caster)
- base_info->implicit_casts.emplace_back(type, caster);
- }
-};
-
-inline function_call::function_call(function_record &f, handle p) :
- func(f), parent(p) {
- args.reserve(f.nargs);
- args_convert.reserve(f.nargs);
-}
-
-/**
- * Partial template specializations to process custom attributes provided to
- * cpp_function_ and class_. These are either used to initialize the respective
- * fields in the type_record and function_record data structures or executed at
- * runtime to deal with custom call policies (e.g. keep_alive).
- */
-template struct process_attribute;
-
-template struct process_attribute_default {
- /// Default implementation: do nothing
- static void init(const T &, function_record *) { }
- static void init(const T &, type_record *) { }
- static void precall(function_call &) { }
- static void postcall(function_call &, handle) { }
-};
-
-/// Process an attribute specifying the function's name
-template <> struct process_attribute : process_attribute_default {
- static void init(const name &n, function_record *r) { r->name = const_cast(n.value); }
-};
-
-/// Process an attribute specifying the function's docstring
-template <> struct process_attribute : process_attribute_default {
- static void init(const doc &n, function_record *r) { r->doc = const_cast(n.value); }
-};
-
-/// Process an attribute specifying the function's docstring (provided as a C-style string)
-template <> struct process_attribute : process_attribute_default {
- static void init(const char *d, function_record *r) { r->doc = const_cast(d); }
- static void init(const char *d, type_record *r) { r->doc = const_cast(d); }
-};
-template <> struct process_attribute : process_attribute { };
-
-/// Process an attribute indicating the function's return value policy
-template <> struct process_attribute : process_attribute_default {
- static void init(const return_value_policy &p, function_record *r) { r->policy = p; }
-};
-
-/// Process an attribute which indicates that this is an overloaded function associated with a given sibling
-template <> struct process_attribute : process_attribute_default {
- static void init(const sibling &s, function_record *r) { r->sibling = s.value; }
-};
-
-/// Process an attribute which indicates that this function is a method
-template <> struct process_attribute : process_attribute_default {
- static void init(const is_method &s, function_record *r) { r->is_method = true; r->scope = s.class_; }
-};
-
-/// Process an attribute which indicates the parent scope of a method
-template <> struct process_attribute : process_attribute_default {
- static void init(const scope &s, function_record *r) { r->scope = s.value; }
-};
-
-/// Process an attribute which indicates that this function is an operator
-template <> struct process_attribute : process_attribute_default {
- static void init(const is_operator &, function_record *r) { r->is_operator = true; }
-};
-
-/// Process a keyword argument attribute (*without* a default value)
-template <> struct process_attribute : process_attribute_default {
- static void init(const arg &a, function_record *r) {
- if (r->is_method && r->args.empty())
- r->args.emplace_back("self", nullptr, handle(), true /*convert*/, false /*none not allowed*/);
- r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none);
- }
-};
-
-/// Process a keyword argument attribute (*with* a default value)
-template <> struct process_attribute : process_attribute_default {
- static void init(const arg_v &a, function_record *r) {
- if (r->is_method && r->args.empty())
- r->args.emplace_back("self", nullptr /*descr*/, handle() /*parent*/, true /*convert*/, false /*none not allowed*/);
-
- if (!a.value) {
-#if !defined(NDEBUG)
- std::string descr("'");
- if (a.name) descr += std::string(a.name) + ": ";
- descr += a.type + "'";
- if (r->is_method) {
- if (r->name)
- descr += " in method '" + (std::string) str(r->scope) + "." + (std::string) r->name + "'";
- else
- descr += " in method of '" + (std::string) str(r->scope) + "'";
- } else if (r->name) {
- descr += " in function '" + (std::string) r->name + "'";
- }
- pybind11_fail("arg(): could not convert default argument "
- + descr + " into a Python object (type not registered yet?)");
-#else
- pybind11_fail("arg(): could not convert default argument "
- "into a Python object (type not registered yet?). "
- "Compile in debug mode for more information.");
-#endif
- }
- r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none);
- }
-};
-
-/// Process a parent class attribute. Single inheritance only (class_ itself already guarantees that)
-template
-struct process_attribute::value>> : process_attribute_default {
- static void init(const handle &h, type_record *r) { r->bases.append(h); }
-};
-
-/// Process a parent class attribute (deprecated, does not support multiple inheritance)
-template
-struct process_attribute> : process_attribute_default> {
- static void init(const base &, type_record *r) { r->add_base(typeid(T), nullptr); }
-};
-
-/// Process a multiple inheritance attribute
-template <>
-struct process_attribute : process_attribute_default {
- static void init(const multiple_inheritance &, type_record *r) { r->multiple_inheritance = true; }
-};
-
-template <>
-struct process_attribute : process_attribute_default {
- static void init(const dynamic_attr &, type_record *r) { r->dynamic_attr = true; }
-};
-
-template <>
-struct process_attribute : process_attribute_default {
- static void init(const buffer_protocol &, type_record *r) { r->buffer_protocol = true; }
-};
-
-template <>
-struct process_attribute : process_attribute_default {
- static void init(const metaclass &m, type_record *r) { r->metaclass = m.value; }
-};
-
-
-/// Process an 'arithmetic' attribute for enums (does nothing here)
-template <>
-struct process_attribute : process_attribute_default {};
-
-template
-struct process_attribute> : process_attribute_default> { };
-
-/**
- * Process a keep_alive call policy -- invokes keep_alive_impl during the
- * pre-call handler if both Nurse, Patient != 0 and use the post-call handler
- * otherwise
- */
-template struct process_attribute> : public process_attribute_default> {
- template = 0>
- static void precall(function_call &call) { keep_alive_impl(Nurse, Patient, call, handle()); }
- template = 0>
- static void postcall(function_call &, handle) { }
- template = 0>
- static void precall(function_call &) { }
- template = 0>
- static void postcall(function_call &call, handle ret) { keep_alive_impl(Nurse, Patient, call, ret); }
-};
-
-/// Recursively iterate over variadic template arguments
-template struct process_attributes {
- static void init(const Args&... args, function_record *r) {
- int unused[] = { 0, (process_attribute::type>::init(args, r), 0) ... };
- ignore_unused(unused);
- }
- static void init(const Args&... args, type_record *r) {
- int unused[] = { 0, (process_attribute::type>::init(args, r), 0) ... };
- ignore_unused(unused);
- }
- static void precall(function_call &call) {
- int unused[] = { 0, (process_attribute::type>::precall(call), 0) ... };
- ignore_unused(unused);
- }
- static void postcall(function_call &call, handle fn_ret) {
- int unused[] = { 0, (process_attribute::type>::postcall(call, fn_ret), 0) ... };
- ignore_unused(unused);
- }
-};
-
-template
-using is_call_guard = is_instantiation;
-
-/// Extract the ``type`` from the first `call_guard` in `Extras...` (or `void_type` if none found)
-template
-using extract_guard_t = typename exactly_one_t, Extra...>::type;
-
-/// Check the number of named arguments at compile time
-template ::value...),
- size_t self = constexpr_sum(std::is_same::value...)>
-constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) {
- return named == 0 || (self + named + has_args + has_kwargs) == nargs;
-}
-
-NAMESPACE_END(detail)
-NAMESPACE_END(pybind11)
diff --git a/ppocr/postprocess/lanms/include/pybind11/buffer_info.h b/ppocr/postprocess/lanms/include/pybind11/buffer_info.h
deleted file mode 100644
index 6d1167d212c4e490a24407b63a21efe447f0a81a..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/include/pybind11/buffer_info.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- pybind11/buffer_info.h: Python buffer object interface
-
- Copyright (c) 2016 Wenzel Jakob
-
- All rights reserved. Use of this source code is governed by a
- BSD-style license that can be found in the LICENSE file.
-*/
-
-#pragma once
-
-#include "common.h"
-
-NAMESPACE_BEGIN(pybind11)
-
-/// Information record describing a Python buffer object
-struct buffer_info {
- void *ptr = nullptr; // Pointer to the underlying storage
- ssize_t itemsize = 0; // Size of individual items in bytes
- ssize_t size = 0; // Total number of entries
- std::string format; // For homogeneous buffers, this should be set to format_descriptor::format()
- ssize_t ndim = 0; // Number of dimensions
- std::vector shape; // Shape of the tensor (1 entry per dimension)
- std::vector strides; // Number of entries between adjacent entries (for each per dimension)
-
- buffer_info() { }
-
- buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
- detail::any_container shape_in, detail::any_container strides_in)
- : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim),
- shape(std::move(shape_in)), strides(std::move(strides_in)) {
- if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size())
- pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length");
- for (size_t i = 0; i < (size_t) ndim; ++i)
- size *= shape[i];
- }
-
- template
- buffer_info(T *ptr, detail::any_container shape_in, detail::any_container strides_in)
- : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor::format(), static_cast(shape_in->size()), std::move(shape_in), std::move(strides_in)) { }
-
- buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size)
- : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}) { }
-
- template
- buffer_info(T *ptr, ssize_t size)
- : buffer_info(ptr, sizeof(T), format_descriptor::format(), size) { }
-
- explicit buffer_info(Py_buffer *view, bool ownview = true)
- : buffer_info(view->buf, view->itemsize, view->format, view->ndim,
- {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}) {
- this->view = view;
- this->ownview = ownview;
- }
-
- buffer_info(const buffer_info &) = delete;
- buffer_info& operator=(const buffer_info &) = delete;
-
- buffer_info(buffer_info &&other) {
- (*this) = std::move(other);
- }
-
- buffer_info& operator=(buffer_info &&rhs) {
- ptr = rhs.ptr;
- itemsize = rhs.itemsize;
- size = rhs.size;
- format = std::move(rhs.format);
- ndim = rhs.ndim;
- shape = std::move(rhs.shape);
- strides = std::move(rhs.strides);
- std::swap(view, rhs.view);
- std::swap(ownview, rhs.ownview);
- return *this;
- }
-
- ~buffer_info() {
- if (view && ownview) { PyBuffer_Release(view); delete view; }
- }
-
-private:
- struct private_ctr_tag { };
-
- buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
- detail::any_container &&shape_in, detail::any_container &&strides_in)
- : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in)) { }
-
- Py_buffer *view = nullptr;
- bool ownview = false;
-};
-
-NAMESPACE_BEGIN(detail)
-
-template struct compare_buffer_info {
- static bool compare(const buffer_info& b) {
- return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T);
- }
-};
-
-template struct compare_buffer_info::value>> {
- static bool compare(const buffer_info& b) {
- return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor::value ||
- ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned::value ? "L" : "l")) ||
- ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned::value ? "N" : "n")));
- }
-};
-
-NAMESPACE_END(detail)
-NAMESPACE_END(pybind11)
diff --git a/ppocr/postprocess/lanms/include/pybind11/cast.h b/ppocr/postprocess/lanms/include/pybind11/cast.h
deleted file mode 100644
index 5db03e2f7b36d0ef8529e93f00cce7d0392b2b49..0000000000000000000000000000000000000000
--- a/ppocr/postprocess/lanms/include/pybind11/cast.h
+++ /dev/null
@@ -1,2058 +0,0 @@
-/*
- pybind11/cast.h: Partial template specializations to cast between
- C++ and Python types
-
- Copyright (c) 2016 Wenzel Jakob
-
- All rights reserved. Use of this source code is governed by a
- BSD-style license that can be found in the LICENSE file.
-*/
-
-#pragma once
-
-#include "pytypes.h"
-#include "typeid.h"
-#include "descr.h"
-#include
-#include
-#include
-
-#if defined(PYBIND11_CPP17)
-# if defined(__has_include)
-# if __has_include()
-# define PYBIND11_HAS_STRING_VIEW
-# endif
-# elif defined(_MSC_VER)
-# define PYBIND11_HAS_STRING_VIEW
-# endif
-#endif
-#ifdef PYBIND11_HAS_STRING_VIEW
-#include
-#endif
-
-NAMESPACE_BEGIN(pybind11)
-NAMESPACE_BEGIN(detail)
-// Forward declarations:
-inline PyTypeObject *make_static_property_type();
-inline PyTypeObject *make_default_metaclass();
-inline PyObject *make_object_base_type(PyTypeObject *metaclass);
-struct value_and_holder;
-
-/// Additional type information which does not fit into the PyTypeObject
-struct type_info {
- PyTypeObject *type;
- const std::type_info *cpptype;
- size_t type_size, holder_size_in_ptrs;
- void *(*operator_new)(size_t);
- void (*init_instance)(instance *, const void *);
- void (*dealloc)(const value_and_holder &v_h);
- std::vector implicit_conversions;
- std::vector> implicit_casts;
- std::vector *direct_conversions;
- buffer_info *(*get_buffer)(PyObject *, void *) = nullptr;
- void *get_buffer_data = nullptr;
- /* A simple type never occurs as a (direct or indirect) parent
- * of a class that makes use of multiple inheritance */
- bool simple_type : 1;
- /* True if there is no multiple inheritance in this type's inheritance tree */
- bool simple_ancestors : 1;
- /* for base vs derived holder_type checks */
- bool default_holder : 1;
-};
-
-// Store the static internals pointer in a version-specific function so that we're guaranteed it
-// will be distinct for modules compiled for different pybind11 versions. Without this, some
-// compilers (i.e. gcc) can use the same static pointer storage location across different .so's,
-// even though the `get_internals()` function itself is local to each shared object.
-template
-internals *&get_internals_ptr() { static internals *internals_ptr = nullptr; return internals_ptr; }
-
-PYBIND11_NOINLINE inline internals &get_internals() {
- internals *&internals_ptr = get_internals_ptr();
- if (internals_ptr)
- return *internals_ptr;
- handle builtins(PyEval_GetBuiltins());
- const char *id = PYBIND11_INTERNALS_ID;
- if (builtins.contains(id) && isinstance(builtins[id])) {
- internals_ptr = *static_cast(capsule(builtins[id]));
- } else {
- internals_ptr = new internals();
- #if defined(WITH_THREAD)
- PyEval_InitThreads();
- PyThreadState *tstate = PyThreadState_Get();
- internals_ptr->tstate = PyThread_create_key();
- PyThread_set_key_value(internals_ptr->tstate, tstate);
- internals_ptr->istate = tstate->interp;
- #endif
- builtins[id] = capsule(&internals_ptr);
- internals_ptr->registered_exception_translators.push_front(
- [](std::exception_ptr p) -> void {
- try {
- if (p) std::rethrow_exception(p);
- } catch (error_already_set &e) { e.restore(); return;
- } catch (const builtin_exception &e) { e.set_error(); return;
- } catch (const std::bad_alloc &e) { PyErr_SetString(PyExc_MemoryError, e.what()); return;
- } catch (const std::domain_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return;
- } catch (const std::invalid_argument &e) { PyErr_SetString(PyExc_ValueError, e.what()); return;
- } catch (const std::length_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return;
- } catch (const std::out_of_range &e) { PyErr_SetString(PyExc_IndexError, e.what()); return;
- } catch (const std::range_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return;
- } catch (const std::exception &e) { PyErr_SetString(PyExc_RuntimeError, e.what()); return;
- } catch (...) {
- PyErr_SetString(PyExc_RuntimeError, "Caught an unknown exception!");
- return;
- }
- }
- );
- internals_ptr->static_property_type = make_static_property_type();
- internals_ptr->default_metaclass = make_default_metaclass();
- internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass);
- }
- return *internals_ptr;
-}
-
-/// A life support system for temporary objects created by `type_caster::load()`.
-/// Adding a patient will keep it alive up until the enclosing function returns.
-class loader_life_support {
-public:
- /// A new patient frame is created when a function is entered
- loader_life_support() {
- get_internals().loader_patient_stack.push_back(nullptr);
- }
-
- /// ... and destroyed after it returns
- ~loader_life_support() {
- auto &stack = get_internals().loader_patient_stack;
- if (stack.empty())
- pybind11_fail("loader_life_support: internal error");
-
- auto ptr = stack.back();
- stack.pop_back();
- Py_CLEAR(ptr);
-
- // A heuristic to reduce the stack's capacity (e.g. after long recursive calls)
- if (stack.capacity() > 16 && stack.size() != 0 && stack.capacity() / stack.size() > 2)
- stack.shrink_to_fit();
- }
-
- /// This can only be used inside a pybind11-bound function, either by `argument_loader`
- /// at argument preparation time or by `py::cast()` at execution time.
- PYBIND11_NOINLINE static void add_patient(handle h) {
- auto &stack = get_internals().loader_patient_stack;
- if (stack.empty())
- throw cast_error("When called outside a bound function, py::cast() cannot "
- "do Python -> C++ conversions which require the creation "
- "of temporary values");
-
- auto &list_ptr = stack.back();
- if (list_ptr == nullptr) {
- list_ptr = PyList_New(1);
- if (!list_ptr)
- pybind11_fail("loader_life_support: error allocating list");
- PyList_SET_ITEM(list_ptr, 0, h.inc_ref().ptr());
- } else {
- auto result = PyList_Append(list_ptr, h.ptr());
- if (result == -1)
- pybind11_fail("loader_life_support: error adding patient");
- }
- }
-};
-
-// Gets the cache entry for the given type, creating it if necessary. The return value is the pair
-// returned by emplace, i.e. an iterator for the entry and a bool set to `true` if the entry was
-// just created.
-inline std::pair all_type_info_get_cache(PyTypeObject *type);
-
-// Populates a just-created cache entry.
-PYBIND11_NOINLINE inline void all_type_info_populate(PyTypeObject *t, std::vector &bases) {
- std::vector check;
- for (handle parent : reinterpret_borrow(t->tp_bases))
- check.push_back((PyTypeObject *) parent.ptr());
-
- auto const &type_dict = get_internals().registered_types_py;
- for (size_t i = 0; i < check.size(); i++) {
- auto type = check[i];
- // Ignore Python2 old-style class super type:
- if (!PyType_Check((PyObject *) type)) continue;
-
- // Check `type` in the current set of registered python types:
- auto it = type_dict.find(type);
- if (it != type_dict.end()) {
- // We found a cache entry for it, so it's either pybind-registered or has pre-computed
- // pybind bases, but we have to make sure we haven't already seen the type(s) before: we
- // want to follow Python/virtual C++ rules that there should only be one instance of a
- // common base.
- for (auto *tinfo : it->second) {
- // NB: Could use a second set here, rather than doing a linear search, but since
- // having a large number of immediate pybind11-registered types seems fairly
- // unlikely, that probably isn't worthwhile.
- bool found = false;
- for (auto *known : bases) {
- if (known == tinfo) { found = true; break; }
- }
- if (!found) bases.push_back(tinfo);
- }
- }
- else if (type->tp_bases) {
- // It's some python type, so keep follow its bases classes to look for one or more
- // registered types
- if (i + 1 == check.size()) {
- // When we're at the end, we can pop off the current element to avoid growing
- // `check` when adding just one base (which is typical--.e. when there is no
- // multiple inheritance)
- check.pop_back();
- i--;
- }
- for (handle parent : reinterpret_borrow(type->tp_bases))
- check.push_back((PyTypeObject *) parent.ptr());
- }
- }
-}
-
-/**
- * Extracts vector of type_info pointers of pybind-registered roots of the given Python type. Will
- * be just 1 pybind type for the Python type of a pybind-registered class, or for any Python-side
- * derived class that uses single inheritance. Will contain as many types as required for a Python
- * class that uses multiple inheritance to inherit (directly or indirectly) from multiple
- * pybind-registered classes. Will be empty if neither the type nor any base classes are
- * pybind-registered.
- *
- * The value is cached for the lifetime of the Python type.
- */
-inline const std::vector &all_type_info(PyTypeObject *type) {
- auto ins = all_type_info_get_cache(type);
- if (ins.second)
- // New cache entry: populate it
- all_type_info_populate(type, ins.first->second);
-
- return ins.first->second;
-}
-
-/**
- * Gets a single pybind11 type info for a python type. Returns nullptr if neither the type nor any
- * ancestors are pybind11-registered. Throws an exception if there are multiple bases--use
- * `all_type_info` instead if you want to support multiple bases.
- */
-PYBIND11_NOINLINE inline detail::type_info* get_type_info(PyTypeObject *type) {
- auto &bases = all_type_info(type);
- if (bases.size() == 0)
- return nullptr;
- if (bases.size() > 1)
- pybind11_fail("pybind11::detail::get_type_info: type has multiple pybind11-registered bases");
- return bases.front();
-}
-
-PYBIND11_NOINLINE inline detail::type_info *get_type_info(const std::type_info &tp,
- bool throw_if_missing = false) {
- auto &types = get_internals().registered_types_cpp;
-
- auto it = types.find(std::type_index(tp));
- if (it != types.end())
- return (detail::type_info *) it->second;
- if (throw_if_missing) {
- std::string tname = tp.name();
- detail::clean_type_id(tname);
- pybind11_fail("pybind11::detail::get_type_info: unable to find type info for \"" + tname + "\"");
- }
- return nullptr;
-}
-
-PYBIND11_NOINLINE inline handle get_type_handle(const std::type_info &tp, bool throw_if_missing) {
- detail::type_info *type_info = get_type_info(tp, throw_if_missing);
- return handle(type_info ? ((PyObject *) type_info->type) : nullptr);
-}
-
-struct value_and_holder {
- instance *inst;
- size_t index;
- const detail::type_info *type;
- void **vh;
-
- value_and_holder(instance *i, const detail::type_info *type, size_t vpos, size_t index) :
- inst{i}, index{index}, type{type},
- vh{inst->simple_layout ? inst->simple_value_holder : &inst->nonsimple.values_and_holders[vpos]}
- {}
-
- // Used for past-the-end iterator
- value_and_holder(size_t index) : index{index} {}
-
- template V *&value_ptr() const {
- return reinterpret_cast(vh[0]);
- }
- // True if this `value_and_holder` has a non-null value pointer
- explicit operator bool() const { return value_ptr(); }
-
- template H &holder() const {
- return reinterpret_cast(vh[1]);
- }
- bool holder_constructed() const {
- return inst->simple_layout
- ? inst->simple_holder_constructed
- : inst->nonsimple.status[index] & instance::status_holder_constructed;
- }
- void set_holder_constructed() {
- if (inst->simple_layout)
- inst->simple_holder_constructed = true;
- else
- inst->nonsimple.status[index] |= instance::status_holder_constructed;
- }
- bool instance_registered() const {
- return inst->simple_layout
- ? inst->simple_instance_registered
- : inst->nonsimple.status[index] & instance::status_instance_registered;
- }
- void set_instance_registered() {
- if (inst->simple_layout)
- inst->simple_instance_registered = true;
- else
- inst->nonsimple.status[index] |= instance::status_instance_registered;
- }
-};
-
-// Container for accessing and iterating over an instance's values/holders
-struct values_and_holders {
-private:
- instance *inst;
- using type_vec = std::vector;
- const type_vec &tinfo;
-
-public:
- values_and_holders(instance *inst) : inst{inst}, tinfo(all_type_info(Py_TYPE(inst))) {}
-
- struct iterator {
- private:
- instance *inst;
- const type_vec *types;
- value_and_holder curr;
- friend struct values_and_holders;
- iterator(instance *inst, const type_vec *tinfo)
- : inst{inst}, types{tinfo},
- curr(inst /* instance */,
- types->empty() ? nullptr : (*types)[0] /* type info */,
- 0, /* vpos: (non-simple types only): the first vptr comes first */
- 0 /* index */)
- {}
- // Past-the-end iterator:
- iterator(size_t end) : curr(end) {}
- public:
- bool operator==(const iterator &other) { return curr.index == other.curr.index; }
- bool operator!=(const iterator &other) { return curr.index != other.curr.index; }
- iterator &operator++() {
- if (!inst->simple_layout)
- curr.vh += 1 + (*types)[curr.index]->holder_size_in_ptrs;
- ++curr.index;
- curr.type = curr.index < types->size() ? (*types)[curr.index] : nullptr;
- return *this;
- }
- value_and_holder &operator*() { return curr; }
- value_and_holder *operator->() { return &curr; }
- };
-
- iterator begin() { return iterator(inst, &tinfo); }
- iterator end() { return iterator(tinfo.size()); }
-
- iterator find(const type_info *find_type) {
- auto it = begin(), endit = end();
- while (it != endit && it->type != find_type) ++it;
- return it;
- }
-
- size_t size() { return tinfo.size(); }
-};
-
-/**
- * Extracts C++ value and holder pointer references from an instance (which may contain multiple
- * values/holders for python-side multiple inheritance) that match the given type. Throws an error
- * if the given type (or ValueType, if omitted) is not a pybind11 base of the given instance. If
- * `find_type` is omitted (or explicitly specified as nullptr) the first value/holder are returned,
- * regardless of type (and the resulting .type will be nullptr).
- *
- * The returned object should be short-lived: in particular, it must not outlive the called-upon
- * instance.
- */
-PYBIND11_NOINLINE inline value_and_holder instance::get_value_and_holder(const type_info *find_type /*= nullptr default in common.h*/) {
- // Optimize common case:
- if (!find_type || Py_TYPE(this) == find_type->type)
- return value_and_holder(this, find_type, 0, 0);
-
- detail::values_and_holders vhs(this);
- auto it = vhs.find(find_type);
- if (it != vhs.end())
- return *it;
-
-#if defined(NDEBUG)
- pybind11_fail("pybind11::detail::instance::get_value_and_holder: "
- "type is not a pybind11 base of the given instance "
- "(compile in debug mode for type details)");
-#else
- pybind11_fail("pybind11::detail::instance::get_value_and_holder: `" +
- std::string(find_type->type->tp_name) + "' is not a pybind11 base of the given `" +
- std::string(Py_TYPE(this)->tp_name) + "' instance");
-#endif
-}
-
-PYBIND11_NOINLINE inline void instance::allocate_layout() {
- auto &tinfo = all_type_info(Py_TYPE(this));
-
- const size_t n_types = tinfo.size();
-
- if (n_types == 0)
- pybind11_fail("instance allocation failed: new instance has no pybind11-registered base types");
-
- simple_layout =
- n_types == 1 && tinfo.front()->holder_size_in_ptrs <= instance_simple_holder_in_ptrs();
-
- // Simple path: no python-side multiple inheritance, and a small-enough holder
- if (simple_layout) {
- simple_value_holder[0] = nullptr;
- simple_holder_constructed = false;
- simple_instance_registered = false;
- }
- else { // multiple base types or a too-large holder
- // Allocate space to hold: [v1*][h1][v2*][h2]...[bb...] where [vN*] is a value pointer,
- // [hN] is the (uninitialized) holder instance for value N, and [bb...] is a set of bool
- // values that tracks whether each associated holder has been initialized. Each [block] is
- // padded, if necessary, to an integer multiple of sizeof(void *).
- size_t space = 0;
- for (auto t : tinfo) {
- space += 1; // value pointer
- space += t->holder_size_in_ptrs; // holder instance
- }
- size_t flags_at = space;
- space += size_in_ptrs(n_types); // status bytes (holder_constructed and instance_registered)
-
- // Allocate space for flags, values, and holders, and initialize it to 0 (flags and values,
- // in particular, need to be 0). Use Python's memory allocation functions: in Python 3.6
- // they default to using pymalloc, which is designed to be efficient for small allocations
- // like the one we're doing here; in earlier versions (and for larger allocations) they are
- // just wrappers around malloc.
-#if PY_VERSION_HEX >= 0x03050000
- nonsimple.values_and_holders = (void **) PyMem_Calloc(space, sizeof(void *));
- if (!nonsimple.values_and_holders) throw std::bad_alloc();
-#else
- nonsimple.values_and_holders = (void **) PyMem_New(void *, space);
- if (!nonsimple.values_and_holders) throw std::bad_alloc();
- std::memset(nonsimple.values_and_holders, 0, space * sizeof(void *));
-#endif
- nonsimple.status = reinterpret_cast(&nonsimple.values_and_holders[flags_at]);
- }
- owned = true;
-}
-
-PYBIND11_NOINLINE inline void instance::deallocate_layout() {
- if (!simple_layout)
- PyMem_Free(nonsimple.values_and_holders);
-}
-
-PYBIND11_NOINLINE inline bool isinstance_generic(handle obj, const std::type_info &tp) {
- handle type = detail::get_type_handle(tp, false);
- if (!type)
- return false;
- return isinstance(obj, type);
-}
-
-PYBIND11_NOINLINE inline std::string error_string() {
- if (!PyErr_Occurred()) {
- PyErr_SetString(PyExc_RuntimeError, "Unknown internal error occurred");
- return "Unknown internal error occurred";
- }
-
- error_scope scope; // Preserve error state
-
- std::string errorString;
- if (scope.type) {
- errorString += handle(scope.type).attr("__name__").cast();
- errorString += ": ";
- }
- if (scope.value)
- errorString += (std::string) str(scope.value);
-
- PyErr_NormalizeException(&scope.type, &scope.value, &scope.trace);
-
-#if PY_MAJOR_VERSION >= 3
- if (scope.trace != nullptr)
- PyException_SetTraceback(scope.value, scope.trace);
-#endif
-
-#if !defined(PYPY_VERSION)
- if (scope.trace) {
- PyTracebackObject *trace = (PyTracebackObject *) scope.trace;
-
- /* Get the deepest trace possible */
- while (trace->tb_next)
- trace = trace->tb_next;
-
- PyFrameObject *frame = trace->tb_frame;
- errorString += "\n\nAt:\n";
- while (frame) {
- int lineno = PyFrame_GetLineNumber(frame);
- errorString +=
- " " + handle(frame->f_code->co_filename).cast() +
- "(" + std::to_string(lineno) + "): " +
- handle(frame->f_code->co_name).cast() + "\n";
- frame = frame->f_back;
- }
- trace = trace->tb_next;
- }
-#endif
-
- return errorString;
-}
-
-PYBIND11_NOINLINE inline handle get_object_handle(const void *ptr, const detail::type_info *type ) {
- auto &instances = get_internals().registered_instances;
- auto range = instances.equal_range(ptr);
- for (auto it = range.first; it != range.second; ++it) {
- for (auto vh : values_and_holders(it->second)) {
- if (vh.type == type)
- return handle((PyObject *) it->second);
- }
- }
- return handle();
-}
-
-inline PyThreadState *get_thread_state_unchecked() {
-#if defined(PYPY_VERSION)
- return PyThreadState_GET();
-#elif PY_VERSION_HEX < 0x03000000
- return _PyThreadState_Current;
-#elif PY_VERSION_HEX < 0x03050000
- return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
-#elif PY_VERSION_HEX < 0x03050200
- return (PyThreadState*) _PyThreadState_Current.value;
-#else
- return _PyThreadState_UncheckedGet();
-#endif
-}
-
-// Forward declarations
-inline void keep_alive_impl(handle nurse, handle patient);
-inline PyObject *make_new_instance(PyTypeObject *type, bool allocate_value = true);
-
-class type_caster_generic {
-public:
- PYBIND11_NOINLINE type_caster_generic(const std::type_info &type_info)
- : typeinfo(get_type_info(type_info)) { }
-
- bool load(handle src, bool convert) {
- return load_impl(src, convert);
- }
-
- PYBIND11_NOINLINE static handle cast(const void *_src, return_value_policy policy, handle parent,
- const detail::type_info *tinfo,
- void *(*copy_constructor)(const void *),
- void *(*move_constructor)(const void *),
- const void *existing_holder = nullptr) {
- if (!tinfo) // no type info: error will be set already
- return handle();
-
- void *src = const_cast(_src);
- if (src == nullptr)
- return none().release();
-
- auto it_instances = get_internals().registered_instances.equal_range(src);
- for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) {
- for (auto instance_type : detail::all_type_info(Py_TYPE(it_i->second))) {
- if (instance_type && instance_type == tinfo)
- return handle((PyObject *) it_i->second).inc_ref();
- }
- }
-
- auto inst = reinterpret_steal