Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleOCR
提交
bdefa140
P
PaddleOCR
项目概览
PaddlePaddle
/
PaddleOCR
大约 1 年 前同步成功
通知
1528
Star
32962
Fork
6643
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
108
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
108
Issue
108
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bdefa140
编写于
6月 16, 2022
作者:
M
MissPenguin
提交者:
GitHub
6月 16, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6552 from an1018/pcb_branch
add pcb
上级
10a3f257
44086577
变更
6
展开全部
隐藏空白更改
内联
并排
Showing
6 changed file
with
292 addition
and
0 deletion
+292
-0
applications/PCB字符识别/PCB字符识别.md
applications/PCB字符识别/PCB字符识别.md
+0
-0
applications/PCB字符识别/gen_data/background/bg.jpg
applications/PCB字符识别/gen_data/background/bg.jpg
+0
-0
applications/PCB字符识别/gen_data/corpus/text.txt
applications/PCB字符识别/gen_data/corpus/text.txt
+31
-0
applications/PCB字符识别/gen_data/det_background/1.png
applications/PCB字符识别/gen_data/det_background/1.png
+0
-0
applications/PCB字符识别/gen_data/det_background/2.png
applications/PCB字符识别/gen_data/det_background/2.png
+0
-0
applications/PCB字符识别/gen_data/gen.py
applications/PCB字符识别/gen_data/gen.py
+261
-0
未找到文件。
applications/PCB字符识别/PCB字符识别.md
0 → 100644
浏览文件 @
bdefa140
此差异已折叠。
点击以展开。
applications/PCB字符识别/gen_data/background/bg.jpg
0 → 100644
浏览文件 @
bdefa140
2.0 KB
applications/PCB字符识别/gen_data/corpus/text.txt
0 → 100644
浏览文件 @
bdefa140
5ZQ
I4UL
PWL
SNOG
ZL02
1C30
O3H
YHRS
N03S
1U5Y
JTK
EN4F
YKJ
DWNH
R42W
X0V
4OF5
08AM
Y93S
GWE2
0KR
9U2A
DBQ
Y6J
ROZ
K06
KIEY
NZQJ
UN1B
6X4
\ No newline at end of file
applications/PCB字符识别/gen_data/det_background/1.png
0 → 100644
浏览文件 @
bdefa140
145 字节
applications/PCB字符识别/gen_data/det_background/2.png
0 → 100644
浏览文件 @
bdefa140
141 字节
applications/PCB字符识别/gen_data/gen.py
0 → 100644
浏览文件 @
bdefa140
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/zcswdt/Color_OCR_image_generator
"""
import
os
import
random
from
PIL
import
Image
,
ImageDraw
,
ImageFont
import
json
import
argparse
def
get_char_lines
(
txt_root_path
):
"""
desc:get corpus line
"""
txt_files
=
os
.
listdir
(
txt_root_path
)
char_lines
=
[]
for
txt
in
txt_files
:
f
=
open
(
os
.
path
.
join
(
txt_root_path
,
txt
),
mode
=
'r'
,
encoding
=
'utf-8'
)
lines
=
f
.
readlines
()
f
.
close
()
for
line
in
lines
:
char_lines
.
append
(
line
.
strip
())
return
char_lines
def
get_horizontal_text_picture
(
image_file
,
chars
,
fonts_list
,
cf
):
"""
desc:gen horizontal text picture
"""
img
=
Image
.
open
(
image_file
)
if
img
.
mode
!=
'RGB'
:
img
=
img
.
convert
(
'RGB'
)
img_w
,
img_h
=
img
.
size
# random choice font
font_path
=
random
.
choice
(
fonts_list
)
# random choice font size
font_size
=
random
.
randint
(
cf
.
font_min_size
,
cf
.
font_max_size
)
font
=
ImageFont
.
truetype
(
font_path
,
font_size
)
ch_w
=
[]
ch_h
=
[]
for
ch
in
chars
:
wt
,
ht
=
font
.
getsize
(
ch
)
ch_w
.
append
(
wt
)
ch_h
.
append
(
ht
)
f_w
=
sum
(
ch_w
)
f_h
=
max
(
ch_h
)
# add space
char_space_width
=
max
(
ch_w
)
f_w
+=
(
char_space_width
*
(
len
(
chars
)
-
1
))
x1
=
random
.
randint
(
0
,
img_w
-
f_w
)
y1
=
random
.
randint
(
0
,
img_h
-
f_h
)
x2
=
x1
+
f_w
y2
=
y1
+
f_h
crop_y1
=
y1
crop_x1
=
x1
crop_y2
=
y2
crop_x2
=
x2
best_color
=
(
0
,
0
,
0
)
draw
=
ImageDraw
.
Draw
(
img
)
for
i
,
ch
in
enumerate
(
chars
):
draw
.
text
((
x1
,
y1
),
ch
,
best_color
,
font
=
font
)
x1
+=
(
ch_w
[
i
]
+
char_space_width
)
crop_img
=
img
.
crop
((
crop_x1
,
crop_y1
,
crop_x2
,
crop_y2
))
return
crop_img
,
chars
def
get_vertical_text_picture
(
image_file
,
chars
,
fonts_list
,
cf
):
"""
desc:gen vertical text picture
"""
img
=
Image
.
open
(
image_file
)
if
img
.
mode
!=
'RGB'
:
img
=
img
.
convert
(
'RGB'
)
img_w
,
img_h
=
img
.
size
# random choice font
font_path
=
random
.
choice
(
fonts_list
)
# random choice font size
font_size
=
random
.
randint
(
cf
.
font_min_size
,
cf
.
font_max_size
)
font
=
ImageFont
.
truetype
(
font_path
,
font_size
)
ch_w
=
[]
ch_h
=
[]
for
ch
in
chars
:
wt
,
ht
=
font
.
getsize
(
ch
)
ch_w
.
append
(
wt
)
ch_h
.
append
(
ht
)
f_w
=
max
(
ch_w
)
f_h
=
sum
(
ch_h
)
x1
=
random
.
randint
(
0
,
img_w
-
f_w
)
y1
=
random
.
randint
(
0
,
img_h
-
f_h
)
x2
=
x1
+
f_w
y2
=
y1
+
f_h
crop_y1
=
y1
crop_x1
=
x1
crop_y2
=
y2
crop_x2
=
x2
best_color
=
(
0
,
0
,
0
)
draw
=
ImageDraw
.
Draw
(
img
)
i
=
0
for
ch
in
chars
:
draw
.
text
((
x1
,
y1
),
ch
,
best_color
,
font
=
font
)
y1
=
y1
+
ch_h
[
i
]
i
=
i
+
1
crop_img
=
img
.
crop
((
crop_x1
,
crop_y1
,
crop_x2
,
crop_y2
))
crop_img
=
crop_img
.
transpose
(
Image
.
ROTATE_90
)
return
crop_img
,
chars
def
get_fonts
(
fonts_path
):
"""
desc: get all fonts
"""
font_files
=
os
.
listdir
(
fonts_path
)
fonts_list
=
[]
for
font_file
in
font_files
:
font_path
=
os
.
path
.
join
(
fonts_path
,
font_file
)
fonts_list
.
append
(
font_path
)
return
fonts_list
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--num_img'
,
type
=
int
,
default
=
30
,
help
=
"Number of images to generate"
)
parser
.
add_argument
(
'--font_min_size'
,
type
=
int
,
default
=
11
)
parser
.
add_argument
(
'--font_max_size'
,
type
=
int
,
default
=
12
,
help
=
"Help adjust the size of the generated text and the size of the picture"
)
parser
.
add_argument
(
'--bg_path'
,
type
=
str
,
default
=
'./background'
,
help
=
'The generated text pictures will be pasted onto the pictures of this folder'
)
parser
.
add_argument
(
'--det_bg_path'
,
type
=
str
,
default
=
'./det_background'
,
help
=
'The generated text pictures will use the pictures of this folder as the background'
)
parser
.
add_argument
(
'--fonts_path'
,
type
=
str
,
default
=
'../../StyleText/fonts'
,
help
=
'The font used to generate the picture'
)
parser
.
add_argument
(
'--corpus_path'
,
type
=
str
,
default
=
'./corpus'
,
help
=
'The corpus used to generate the text picture'
)
parser
.
add_argument
(
'--output_dir'
,
type
=
str
,
default
=
'./output/'
,
help
=
'Images save dir'
)
cf
=
parser
.
parse_args
()
# save path
if
not
os
.
path
.
exists
(
cf
.
output_dir
):
os
.
mkdir
(
cf
.
output_dir
)
# get corpus
txt_root_path
=
cf
.
corpus_path
char_lines
=
get_char_lines
(
txt_root_path
=
txt_root_path
)
# get all fonts
fonts_path
=
cf
.
fonts_path
fonts_list
=
get_fonts
(
fonts_path
)
# rec bg
img_root_path
=
cf
.
bg_path
imnames
=
os
.
listdir
(
img_root_path
)
# det bg
det_bg_path
=
cf
.
det_bg_path
bg_pics
=
os
.
listdir
(
det_bg_path
)
# OCR det files
det_val_file
=
open
(
cf
.
output_dir
+
'det_gt_val.txt'
,
'w'
,
encoding
=
'utf-8'
)
det_train_file
=
open
(
cf
.
output_dir
+
'det_gt_train.txt'
,
'w'
,
encoding
=
'utf-8'
)
# det imgs
det_save_dir
=
'imgs/'
if
not
os
.
path
.
exists
(
cf
.
output_dir
+
det_save_dir
):
os
.
mkdir
(
cf
.
output_dir
+
det_save_dir
)
det_val_save_dir
=
'imgs_val/'
if
not
os
.
path
.
exists
(
cf
.
output_dir
+
det_val_save_dir
):
os
.
mkdir
(
cf
.
output_dir
+
det_val_save_dir
)
# OCR rec files
rec_val_file
=
open
(
cf
.
output_dir
+
'rec_gt_val.txt'
,
'w'
,
encoding
=
'utf-8'
)
rec_train_file
=
open
(
cf
.
output_dir
+
'rec_gt_train.txt'
,
'w'
,
encoding
=
'utf-8'
)
# rec imgs
rec_save_dir
=
'rec_imgs/'
if
not
os
.
path
.
exists
(
cf
.
output_dir
+
rec_save_dir
):
os
.
mkdir
(
cf
.
output_dir
+
rec_save_dir
)
rec_val_save_dir
=
'rec_imgs_val/'
if
not
os
.
path
.
exists
(
cf
.
output_dir
+
rec_val_save_dir
):
os
.
mkdir
(
cf
.
output_dir
+
rec_val_save_dir
)
val_ratio
=
cf
.
num_img
*
0.2
# val dataset ratio
print
(
'start generating...'
)
for
i
in
range
(
0
,
cf
.
num_img
):
imname
=
random
.
choice
(
imnames
)
img_path
=
os
.
path
.
join
(
img_root_path
,
imname
)
rnd
=
random
.
random
()
# gen horizontal text picture
if
rnd
<
0.5
:
gen_img
,
chars
=
get_horizontal_text_picture
(
img_path
,
char_lines
[
i
],
fonts_list
,
cf
)
ori_w
,
ori_h
=
gen_img
.
size
gen_img
=
gen_img
.
crop
((
0
,
3
,
ori_w
,
ori_h
))
# gen vertical text picture
else
:
gen_img
,
chars
=
get_vertical_text_picture
(
img_path
,
char_lines
[
i
],
fonts_list
,
cf
)
ori_w
,
ori_h
=
gen_img
.
size
gen_img
=
gen_img
.
crop
((
3
,
0
,
ori_w
,
ori_h
))
ori_w
,
ori_h
=
gen_img
.
size
# rec imgs
save_img_name
=
str
(
i
).
zfill
(
4
)
+
'.jpg'
if
i
<
val_ratio
:
save_dir
=
os
.
path
.
join
(
rec_val_save_dir
,
save_img_name
)
line
=
save_dir
+
'
\t
'
+
char_lines
[
i
]
+
'
\n
'
rec_val_file
.
write
(
line
)
else
:
save_dir
=
os
.
path
.
join
(
rec_save_dir
,
save_img_name
)
line
=
save_dir
+
'
\t
'
+
char_lines
[
i
]
+
'
\n
'
rec_train_file
.
write
(
line
)
gen_img
.
save
(
cf
.
output_dir
+
save_dir
,
quality
=
95
,
subsampling
=
0
)
# det img
# random choice bg
bg_pic
=
random
.
sample
(
bg_pics
,
1
)[
0
]
det_img
=
Image
.
open
(
os
.
path
.
join
(
det_bg_path
,
bg_pic
))
# the PCB position is fixed, modify it according to your own scenario
if
bg_pic
==
'1.png'
:
x1
=
38
y1
=
3
else
:
x1
=
34
y1
=
1
det_img
.
paste
(
gen_img
,
(
x1
,
y1
))
# text pos
chars_pos
=
[[
x1
,
y1
],
[
x1
+
ori_w
,
y1
],
[
x1
+
ori_w
,
y1
+
ori_h
],
[
x1
,
y1
+
ori_h
]]
label
=
[{
"transcription"
:
char_lines
[
i
],
"points"
:
chars_pos
}]
if
i
<
val_ratio
:
save_dir
=
os
.
path
.
join
(
det_val_save_dir
,
save_img_name
)
det_val_file
.
write
(
save_dir
+
'
\t
'
+
json
.
dumps
(
label
,
ensure_ascii
=
False
)
+
'
\n
'
)
else
:
save_dir
=
os
.
path
.
join
(
det_save_dir
,
save_img_name
)
det_train_file
.
write
(
save_dir
+
'
\t
'
+
json
.
dumps
(
label
,
ensure_ascii
=
False
)
+
'
\n
'
)
det_img
.
save
(
cf
.
output_dir
+
save_dir
,
quality
=
95
,
subsampling
=
0
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录