Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_41840029
PaddleOCR
提交
97a3af3b
P
PaddleOCR
项目概览
weixin_41840029
/
PaddleOCR
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleOCR
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
97a3af3b
编写于
7月 06, 2020
作者:
T
tink2123
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add distort and space
上级
5067126e
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
269 addition
and
6 deletion
+269
-6
configs/rec/rec_chinese_lite_train.yml
configs/rec/rec_chinese_lite_train.yml
+1
-0
configs/rec/rec_icdar15_train.yml
configs/rec/rec_icdar15_train.yml
+1
-0
ppocr/data/rec/dataset_traversal.py
ppocr/data/rec/dataset_traversal.py
+14
-4
ppocr/data/rec/img_tools.py
ppocr/data/rec/img_tools.py
+251
-1
ppocr/utils/ppocr_keys_v1.txt
ppocr/utils/ppocr_keys_v1.txt
+2
-1
未找到文件。
configs/rec/rec_chinese_lite_train.yml
浏览文件 @
97a3af3b
...
@@ -14,6 +14,7 @@ Global:
...
@@ -14,6 +14,7 @@ Global:
character_type
:
ch
character_type
:
ch
character_dict_path
:
./ppocr/utils/ppocr_keys_v1.txt
character_dict_path
:
./ppocr/utils/ppocr_keys_v1.txt
loss_type
:
ctc
loss_type
:
ctc
distort
:
true
reader_yml
:
./configs/rec/rec_chinese_reader.yml
reader_yml
:
./configs/rec/rec_chinese_reader.yml
pretrain_weights
:
pretrain_weights
:
checkpoints
:
checkpoints
:
...
...
configs/rec/rec_icdar15_train.yml
浏览文件 @
97a3af3b
...
@@ -13,6 +13,7 @@ Global:
...
@@ -13,6 +13,7 @@ Global:
max_text_length
:
25
max_text_length
:
25
character_type
:
en
character_type
:
en
loss_type
:
ctc
loss_type
:
ctc
distort
:
true
reader_yml
:
./configs/rec/rec_icdar15_reader.yml
reader_yml
:
./configs/rec/rec_icdar15_reader.yml
pretrain_weights
:
./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy
pretrain_weights
:
./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy
checkpoints
:
checkpoints
:
...
...
ppocr/data/rec/dataset_traversal.py
浏览文件 @
97a3af3b
...
@@ -45,6 +45,8 @@ class LMDBReader(object):
...
@@ -45,6 +45,8 @@ class LMDBReader(object):
self
.
use_tps
=
False
self
.
use_tps
=
False
if
"tps"
in
params
:
if
"tps"
in
params
:
self
.
ues_tps
=
True
self
.
ues_tps
=
True
if
"distort"
in
params
:
self
.
use_distort
=
params
[
'distort'
]
if
params
[
'mode'
]
==
'train'
:
if
params
[
'mode'
]
==
'train'
:
self
.
batch_size
=
params
[
'train_batch_size_per_card'
]
self
.
batch_size
=
params
[
'train_batch_size_per_card'
]
self
.
drop_last
=
True
self
.
drop_last
=
True
...
@@ -142,7 +144,8 @@ class LMDBReader(object):
...
@@ -142,7 +144,8 @@ class LMDBReader(object):
label
=
label
,
label
=
label
,
char_ops
=
self
.
char_ops
,
char_ops
=
self
.
char_ops
,
loss_type
=
self
.
loss_type
,
loss_type
=
self
.
loss_type
,
max_text_length
=
self
.
max_text_length
)
max_text_length
=
self
.
max_text_length
,
distort
=
self
.
use_distort
)
if
outs
is
None
:
if
outs
is
None
:
continue
continue
yield
outs
yield
outs
...
@@ -185,6 +188,8 @@ class SimpleReader(object):
...
@@ -185,6 +188,8 @@ class SimpleReader(object):
self
.
use_tps
=
False
self
.
use_tps
=
False
if
"tps"
in
params
:
if
"tps"
in
params
:
self
.
use_tps
=
True
self
.
use_tps
=
True
if
"distort"
in
params
:
self
.
use_distort
=
params
[
'distort'
]
if
params
[
'mode'
]
==
'train'
:
if
params
[
'mode'
]
==
'train'
:
self
.
batch_size
=
params
[
'train_batch_size_per_card'
]
self
.
batch_size
=
params
[
'train_batch_size_per_card'
]
self
.
drop_last
=
True
self
.
drop_last
=
True
...
@@ -232,9 +237,14 @@ class SimpleReader(object):
...
@@ -232,9 +237,14 @@ class SimpleReader(object):
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
label
=
substr
[
1
]
label
=
substr
[
1
]
outs
=
process_image
(
img
,
self
.
image_shape
,
label
,
outs
=
process_image
(
self
.
char_ops
,
self
.
loss_type
,
img
=
img
,
self
.
max_text_length
)
image_shape
=
self
.
image_shape
,
label
=
label
,
char_ops
=
self
.
char_ops
,
loss_type
=
self
.
loss_type
,
max_text_length
=
self
.
max_text_length
,
distort
=
self
.
use_distort
)
if
outs
is
None
:
if
outs
is
None
:
continue
continue
yield
outs
yield
outs
...
...
ppocr/data/rec/img_tools.py
浏览文件 @
97a3af3b
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
import
math
import
math
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
import
random
from
ppocr.utils.utility
import
initial_logger
from
ppocr.utils.utility
import
initial_logger
logger
=
initial_logger
()
logger
=
initial_logger
()
...
@@ -89,6 +90,252 @@ def get_img_data(value):
...
@@ -89,6 +90,252 @@ def get_img_data(value):
return
imgori
return
imgori
def
flag
():
"""
flag
"""
return
1
if
random
.
random
()
>
0.5000001
else
-
1
def
cvtColor
(
img
):
"""
cvtColor
"""
hsv
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2HSV
)
delta
=
0.001
*
random
.
random
()
*
flag
()
hsv
[:,
:,
2
]
=
hsv
[:,
:,
2
]
*
(
1
+
delta
)
new_img
=
cv2
.
cvtColor
(
hsv
,
cv2
.
COLOR_HSV2BGR
)
return
new_img
def
blur
(
img
):
"""
blur
"""
h
,
w
,
_
=
img
.
shape
if
h
>
10
and
w
>
10
:
return
cv2
.
GaussianBlur
(
img
,
(
5
,
5
),
1
)
else
:
return
img
def
doudong
(
img
):
"""
doudong
"""
w
,
h
,
_
=
img
.
shape
if
h
>
10
and
w
>
10
:
thres
=
min
(
w
,
h
)
s
=
int
(
random
.
random
()
*
thres
*
0.01
)
src_img
=
img
.
copy
()
for
i
in
range
(
s
):
img
[
i
:,
i
:,
:]
=
src_img
[:
w
-
i
,
:
h
-
i
,
:]
return
img
else
:
return
img
def
add_gasuss_noise
(
image
,
mean
=
0
,
var
=
0.1
):
noise
=
np
.
random
.
normal
(
mean
,
var
**
0.5
,
image
.
shape
)
out
=
image
+
0.5
*
noise
out
=
np
.
clip
(
out
,
0
,
255
)
out
=
np
.
uint8
(
out
)
return
out
def
get_crop
(
image
):
"""
random crop
"""
h
,
w
,
_
=
image
.
shape
top_min
=
1
top_max
=
8
top_crop
=
int
(
random
.
randint
(
top_min
,
top_max
))
crop_img
=
image
.
copy
()
ratio
=
random
.
randint
(
0
,
1
)
if
ratio
:
crop_img
=
crop_img
[
top_crop
:
h
,
:,
:]
else
:
crop_img
=
crop_img
[
0
:
h
-
top_crop
,
:,
:]
return
crop_img
class
Config
:
"""
Config
"""
def
__init__
(
self
,
):
self
.
anglex
=
random
.
random
()
*
30
self
.
angley
=
random
.
random
()
*
15
self
.
anglez
=
random
.
random
()
*
10
self
.
fov
=
42
self
.
r
=
0
self
.
shearx
=
random
.
random
()
*
0.3
self
.
sheary
=
random
.
random
()
*
0.05
self
.
borderMode
=
cv2
.
BORDER_REPLICATE
def
make
(
self
,
w
,
h
,
ang
):
"""
make
"""
self
.
anglex
=
random
.
random
()
*
5
*
flag
()
self
.
angley
=
random
.
random
()
*
5
*
flag
()
self
.
anglez
=
-
1
*
random
.
random
()
*
int
(
ang
)
*
flag
()
self
.
fov
=
42
self
.
r
=
0
self
.
shearx
=
0
self
.
sheary
=
0
self
.
borderMode
=
cv2
.
BORDER_REPLICATE
self
.
w
=
w
self
.
h
=
h
self
.
perspective
=
True
self
.
crop
=
True
self
.
affine
=
False
self
.
reverse
=
True
self
.
noise
=
True
self
.
dou
=
False
self
.
blur
=
True
self
.
color
=
True
def
rad
(
x
):
"""
rad
"""
return
x
*
np
.
pi
/
180
def
get_warpR
(
config
):
"""
get_warpR
"""
anglex
,
angley
,
anglez
,
fov
,
w
,
h
,
r
=
\
config
.
anglex
,
config
.
angley
,
config
.
anglez
,
config
.
fov
,
config
.
w
,
config
.
h
,
config
.
r
if
w
>
69
and
w
<
112
:
anglex
=
anglex
*
1.5
z
=
np
.
sqrt
(
w
**
2
+
h
**
2
)
/
2
/
np
.
tan
(
rad
(
fov
/
2
))
# Homogeneous coordinate transformation matrix
rx
=
np
.
array
([[
1
,
0
,
0
,
0
],
[
0
,
np
.
cos
(
rad
(
anglex
)),
-
np
.
sin
(
rad
(
anglex
)),
0
],
[
0
,
-
np
.
sin
(
rad
(
anglex
)),
np
.
cos
(
rad
(
anglex
)),
0
,
],
[
0
,
0
,
0
,
1
]],
np
.
float32
)
ry
=
np
.
array
([[
np
.
cos
(
rad
(
angley
)),
0
,
np
.
sin
(
rad
(
angley
)),
0
],
[
0
,
1
,
0
,
0
],
[
-
np
.
sin
(
rad
(
angley
)),
0
,
np
.
cos
(
rad
(
angley
)),
0
,
],
[
0
,
0
,
0
,
1
]],
np
.
float32
)
rz
=
np
.
array
([[
np
.
cos
(
rad
(
anglez
)),
np
.
sin
(
rad
(
anglez
)),
0
,
0
],
[
-
np
.
sin
(
rad
(
anglez
)),
np
.
cos
(
rad
(
anglez
)),
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
]],
np
.
float32
)
r
=
rx
.
dot
(
ry
).
dot
(
rz
)
# generate 4 points
pcenter
=
np
.
array
([
h
/
2
,
w
/
2
,
0
,
0
],
np
.
float32
)
p1
=
np
.
array
([
0
,
0
,
0
,
0
],
np
.
float32
)
-
pcenter
p2
=
np
.
array
([
w
,
0
,
0
,
0
],
np
.
float32
)
-
pcenter
p3
=
np
.
array
([
0
,
h
,
0
,
0
],
np
.
float32
)
-
pcenter
p4
=
np
.
array
([
w
,
h
,
0
,
0
],
np
.
float32
)
-
pcenter
dst1
=
r
.
dot
(
p1
)
dst2
=
r
.
dot
(
p2
)
dst3
=
r
.
dot
(
p3
)
dst4
=
r
.
dot
(
p4
)
list_dst
=
[
dst1
,
dst2
,
dst3
,
dst4
]
org
=
np
.
array
([[
0
,
0
],
[
w
,
0
],
[
0
,
h
],
[
w
,
h
]],
np
.
float32
)
dst
=
np
.
zeros
((
4
,
2
),
np
.
float32
)
# Project onto the image plane
for
i
in
range
(
4
):
dst
[
i
,
0
]
=
list_dst
[
i
][
0
]
*
z
/
(
z
-
list_dst
[
i
][
2
])
+
pcenter
[
0
]
dst
[
i
,
1
]
=
list_dst
[
i
][
1
]
*
z
/
(
z
-
list_dst
[
i
][
2
])
+
pcenter
[
1
]
warpR
=
cv2
.
getPerspectiveTransform
(
org
,
dst
)
dst1
,
dst2
,
dst3
,
dst4
=
dst
r1
=
int
(
min
(
dst1
[
1
],
dst2
[
1
]))
r2
=
int
(
max
(
dst3
[
1
],
dst4
[
1
]))
c1
=
int
(
min
(
dst1
[
0
],
dst3
[
0
]))
c2
=
int
(
max
(
dst2
[
0
],
dst4
[
0
]))
try
:
ratio
=
min
(
1.0
*
h
/
(
r2
-
r1
),
1.0
*
w
/
(
c2
-
c1
))
dx
=
-
c1
dy
=
-
r1
T1
=
np
.
float32
([[
1.
,
0
,
dx
],
[
0
,
1.
,
dy
],
[
0
,
0
,
1.0
/
ratio
]])
ret
=
T1
.
dot
(
warpR
)
except
:
ratio
=
1.0
T1
=
np
.
float32
([[
1.
,
0
,
0
],
[
0
,
1.
,
0
],
[
0
,
0
,
1.
]])
ret
=
T1
return
ret
,
(
-
r1
,
-
c1
),
ratio
,
dst
def
get_warpAffine
(
config
):
"""
get_warpAffine
"""
anglez
=
config
.
anglez
rz
=
np
.
array
([[
np
.
cos
(
rad
(
anglez
)),
np
.
sin
(
rad
(
anglez
)),
0
],
[
-
np
.
sin
(
rad
(
anglez
)),
np
.
cos
(
rad
(
anglez
)),
0
]],
np
.
float32
)
return
rz
def
warp
(
img
,
ang
):
"""
warp
"""
h
,
w
,
_
=
img
.
shape
config
=
Config
()
config
.
make
(
w
,
h
,
ang
)
new_img
=
img
if
config
.
perspective
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
warpR
,
(
r1
,
c1
),
ratio
,
dst
=
get_warpR
(
config
)
new_w
=
int
(
np
.
max
(
dst
[:,
0
]))
-
int
(
np
.
min
(
dst
[:,
0
]))
new_img
=
cv2
.
warpPerspective
(
new_img
,
warpR
,
(
int
(
new_w
*
ratio
),
h
),
borderMode
=
config
.
borderMode
)
if
config
.
crop
:
img_height
,
img_width
=
img
.
shape
[
0
:
2
]
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
and
img_height
>=
20
and
img_width
>=
20
:
new_img
=
get_crop
(
new_img
)
if
config
.
affine
:
warpT
=
get_warpAffine
(
config
)
new_img
=
cv2
.
warpAffine
(
new_img
,
warpT
,
(
w
,
h
),
borderMode
=
config
.
borderMode
)
if
config
.
blur
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
new_img
=
blur
(
new_img
)
if
config
.
color
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
new_img
=
cvtColor
(
new_img
)
if
config
.
dou
:
new_img
=
doudong
(
new_img
)
if
config
.
noise
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
new_img
=
add_gasuss_noise
(
new_img
)
if
config
.
reverse
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
new_img
=
255
-
new_img
return
new_img
def
process_image
(
img
,
def
process_image
(
img
,
image_shape
,
image_shape
,
label
=
None
,
label
=
None
,
...
@@ -96,7 +343,10 @@ def process_image(img,
...
@@ -96,7 +343,10 @@ def process_image(img,
loss_type
=
None
,
loss_type
=
None
,
max_text_length
=
None
,
max_text_length
=
None
,
tps
=
None
,
tps
=
None
,
infer_mode
=
False
):
infer_mode
=
False
,
distort
=
False
):
if
distort
:
img
=
warp
(
img
,
10
)
if
infer_mode
and
char_ops
.
character_type
==
"ch"
and
not
tps
:
if
infer_mode
and
char_ops
.
character_type
==
"ch"
and
not
tps
:
norm_img
=
resize_norm_img_chinese
(
img
,
image_shape
)
norm_img
=
resize_norm_img_chinese
(
img
,
image_shape
)
else
:
else
:
...
...
ppocr/utils/ppocr_keys_v1.txt
浏览文件 @
97a3af3b
...
@@ -6620,4 +6620,5 @@ j
...
@@ -6620,4 +6620,5 @@ j
緖
緖
續
續
紹
紹
懮
懮
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录