diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml
index 0f08909add17d8c73ad6e1b00e17d4c351def7e5..ab484a44833a405513d7f2b4079a4da4c2e403c8 100644
--- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml
@@ -141,6 +141,7 @@ Train:
img_mode: BGR
channel_first: False
- DetLabelEncode: # Class handling label
+ - CopyPaste:
- IaaAugment:
augmenter_args:
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml
index 1159d71bf94c330e26c3009b38c5c2b4a9c96f52..46daeeb86d004772a6fb964d602369dcd53b3a01 100644
--- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml
@@ -68,8 +68,7 @@ Loss:
ohem_ratio: 3
- DistillationDBLoss:
weight: 1.0
- model_name_list: ["Student", "Teacher"]
- # key: maps
+ model_name_list: ["Student"]
name: DBLoss
balance_loss: true
main_loss_type: DiceLoss
@@ -116,6 +115,7 @@ Train:
img_mode: BGR
channel_first: False
- DetLabelEncode: # Class handling label
+ - CopyPaste:
- IaaAugment:
augmenter_args:
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml
index 7fe2d2e1a065b54d0e2479475f5f67ac5e38a166..bfbc3b6268cf521acb035be33ced9141046fc430 100644
--- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml
@@ -118,6 +118,7 @@ Train:
img_mode: BGR
channel_first: False
- DetLabelEncode: # Class handling label
+ - CopyPaste:
- IaaAugment:
augmenter_args:
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
diff --git a/configs/e2e/e2e_r50_vd_pg.yml b/configs/e2e/e2e_r50_vd_pg.yml
index 4a6e19f4461c7236f3a9a5253437eff97fa72f67..c4c5226e796a42db723ce78ef65473e357c25dc6 100644
--- a/configs/e2e/e2e_r50_vd_pg.yml
+++ b/configs/e2e/e2e_r50_vd_pg.yml
@@ -94,7 +94,7 @@ Eval:
label_file_list: [./train_data/total_text/test/test.txt]
transforms:
- DecodeImage: # load image
- img_mode: RGB
+ img_mode: BGR
channel_first: False
- E2ELabelEncodeTest:
- E2EResizeForTest:
@@ -111,4 +111,4 @@ Eval:
shuffle: False
drop_last: False
batch_size_per_card: 1 # must be 1
- num_workers: 2
\ No newline at end of file
+ num_workers: 2
diff --git a/ppocr/losses/rec_sar_loss.py b/ppocr/losses/rec_sar_loss.py
index 9e1c6495fb5ffa274662a3d1031b174d7297ee30..c8bd8bb0ca395fa4658e57b8dcac52a3e94aadce 100644
--- a/ppocr/losses/rec_sar_loss.py
+++ b/ppocr/losses/rec_sar_loss.py
@@ -9,11 +9,14 @@ from paddle import nn
class SARLoss(nn.Layer):
def __init__(self, **kwargs):
super(SARLoss, self).__init__()
- self.loss_func = paddle.nn.loss.CrossEntropyLoss(reduction="mean", ignore_index=96)
+ self.loss_func = paddle.nn.loss.CrossEntropyLoss(
+ reduction="mean", ignore_index=92)
def forward(self, predicts, batch):
- predict = predicts[:, :-1, :] # ignore last index of outputs to be in same seq_len with targets
- label = batch[1].astype("int64")[:, 1:] # ignore first index of target in loss calculation
+ predict = predicts[:, :
+ -1, :] # ignore last index of outputs to be in same seq_len with targets
+ label = batch[1].astype(
+ "int64")[:, 1:] # ignore first index of target in loss calculation
batch_size, num_steps, num_classes = predict.shape[0], predict.shape[
1], predict.shape[2]
assert len(label.shape) == len(list(predict.shape)) - 1, \
diff --git a/ppocr/modeling/necks/rnn.py b/ppocr/modeling/necks/rnn.py
index de87b3d9895168657f8c9722177c026b992c2966..86e649028f8fbb76cb5a1fd85381bd361277c6ee 100644
--- a/ppocr/modeling/necks/rnn.py
+++ b/ppocr/modeling/necks/rnn.py
@@ -51,7 +51,7 @@ class EncoderWithFC(nn.Layer):
super(EncoderWithFC, self).__init__()
self.out_channels = hidden_size
weight_attr, bias_attr = get_para_bias_attr(
- l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')
+ l2_decay=0.00001, k=in_channels)
self.fc = nn.Linear(
in_channels,
hidden_size,
diff --git a/ppocr/utils/dict90.txt b/ppocr/utils/dict90.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a945ae9c526e4faa68852eb3fb47d078a2f3f6ce
--- /dev/null
+++ b/ppocr/utils/dict90.txt
@@ -0,0 +1,90 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+_
+`
+~
\ No newline at end of file
diff --git a/ppstructure/layout/train_layoutparser_model.md b/ppstructure/layout/train_layoutparser_model.md
index 08f5ebbf1aa276e4a3ecf27af46442161afcda1f..58975d71606e45b2f68a7f68565459042ef32775 100644
--- a/ppstructure/layout/train_layoutparser_model.md
+++ b/ppstructure/layout/train_layoutparser_model.md
@@ -4,9 +4,9 @@
[1.1 Requirements](#Requirements)
- [1.2 Install PaddleDetection](#Install PaddleDetection)
+ [1.2 Install PaddleDetection](#Install_PaddleDetection)
-[2. Data preparation](#Data preparation)
+[2. Data preparation](#Data_reparation)
[3. Configuration](#Configuration)
@@ -16,7 +16,7 @@
[6. Deployment](#Deployment)
- [6.1 Export model](#Export model)
+ [6.1 Export model](#Export_model)
[6.2 Inference](#Inference)
@@ -35,7 +35,7 @@
- CUDA >= 10.1
- cuDNN >= 7.6
-
+
### 1.2 Install PaddleDetection
@@ -51,7 +51,7 @@ pip install -r requirements.txt
For more installation tutorials, please refer to: [Install doc](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/INSTALL_cn.md)
-
+
## 2. Data preparation
@@ -165,7 +165,7 @@ python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --infer
Use your trained model in Layout Parser
-
+
### 6.1 Export model
diff --git a/tools/infer/predict_e2e.py b/tools/infer/predict_e2e.py
index 8ff279d7437965f725082a9eb1c83e05a7ffc8a8..5029d6059346a00062418d8d1b6cb029b0110643 100755
--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
@@ -141,7 +141,6 @@ if __name__ == "__main__":
img, flag = check_and_read_gif(image_file)
if not flag:
img = cv2.imread(image_file)
- img = img[:, :, ::-1]
if img is None:
logger.info("error in loading image:{}".format(image_file))
continue