Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
bf281041
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bf281041
编写于
2月 08, 2018
作者:
Z
zhxfl
浏览文件
操作
浏览文件
下载
差异文件
fix by review
上级
93fecc89
b62b05fc
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
160 addition
and
31 deletion
+160
-31
fluid/DeepASR/data_utils/data_reader.py
fluid/DeepASR/data_utils/data_reader.py
+40
-27
fluid/DeepASR/data_utils/util.py
fluid/DeepASR/data_utils/util.py
+23
-4
fluid/ocr_recognition/ctc_reader.py
fluid/ocr_recognition/ctc_reader.py
+97
-0
未找到文件。
fluid/DeepASR/data_utils/data_reader.py
浏览文件 @
bf281041
...
@@ -15,6 +15,7 @@ from multiprocessing import Manager, Process
...
@@ -15,6 +15,7 @@ from multiprocessing import Manager, Process
import
data_utils.augmentor.trans_mean_variance_norm
as
trans_mean_variance_norm
import
data_utils.augmentor.trans_mean_variance_norm
as
trans_mean_variance_norm
import
data_utils.augmentor.trans_add_delta
as
trans_add_delta
import
data_utils.augmentor.trans_add_delta
as
trans_add_delta
from
data_utils.util
import
suppress_complaints
,
suppress_signal
from
data_utils.util
import
suppress_complaints
,
suppress_signal
from
data_utils.util
import
CriticalException
,
ForceExitWrapper
class
SampleInfo
(
object
):
class
SampleInfo
(
object
):
...
@@ -89,6 +90,7 @@ class SampleInfoBucket(object):
...
@@ -89,6 +90,7 @@ class SampleInfoBucket(object):
self
.
_split_perturb
=
split_perturb
self
.
_split_perturb
=
split_perturb
self
.
_split_sentence_threshold
=
split_sentence_threshold
self
.
_split_sentence_threshold
=
split_sentence_threshold
self
.
_split_sub_sentence_len
=
split_sub_sentence_len
self
.
_split_sub_sentence_len
=
split_sub_sentence_len
self
.
_rng
=
random
.
Random
(
0
)
def
generate_sample_info_list
(
self
):
def
generate_sample_info_list
(
self
):
sample_info_list
=
[]
sample_info_list
=
[]
...
@@ -213,6 +215,7 @@ class DataReader(object):
...
@@ -213,6 +215,7 @@ class DataReader(object):
self
.
_batch_buffer_size
=
batch_buffer_size
self
.
_batch_buffer_size
=
batch_buffer_size
self
.
_process_num
=
process_num
self
.
_process_num
=
process_num
self
.
_verbose
=
verbose
self
.
_verbose
=
verbose
self
.
_force_exit
=
ForceExitWrapper
(
self
.
_manager
.
Value
(
'b'
,
False
))
def
generate_bucket_list
(
self
,
is_shuffle
):
def
generate_bucket_list
(
self
,
is_shuffle
):
if
self
.
_block_info_list
is
None
:
if
self
.
_block_info_list
is
None
:
...
@@ -251,11 +254,15 @@ class DataReader(object):
...
@@ -251,11 +254,15 @@ class DataReader(object):
sample_queue
=
self
.
_manager
.
Queue
(
self
.
_sample_buffer_size
)
sample_queue
=
self
.
_manager
.
Queue
(
self
.
_sample_buffer_size
)
self
.
_order_id
=
0
self
.
_order_id
=
0
@
suppress_complaints
(
verbose
=
self
.
_verbose
)
@
suppress_complaints
(
verbose
=
self
.
_verbose
,
notify
=
self
.
_force_exit
)
def
ordered_feeding_task
(
sample_info_queue
):
def
ordered_feeding_task
(
sample_info_queue
):
for
sample_info_bucket
in
self
.
_bucket_list
:
for
sample_info_bucket
in
self
.
_bucket_list
:
sample_info_list
=
sample_info_bucket
.
generate_sample_info_list
(
try
:
)
sample_info_list
=
\
sample_info_bucket
.
generate_sample_info_list
()
except
Exception
as
e
:
raise
CriticalException
(
e
)
else
:
self
.
_rng
.
shuffle
(
sample_info_list
)
# do shuffle here
self
.
_rng
.
shuffle
(
sample_info_list
)
# do shuffle here
for
sample_info
in
sample_info_list
:
for
sample_info
in
sample_info_list
:
sample_info_queue
.
put
((
sample_info
,
self
.
_order_id
))
sample_info_queue
.
put
((
sample_info
,
self
.
_order_id
))
...
@@ -269,18 +276,21 @@ class DataReader(object):
...
@@ -269,18 +276,21 @@ class DataReader(object):
feeding_thread
.
daemon
=
True
feeding_thread
.
daemon
=
True
feeding_thread
.
start
()
feeding_thread
.
start
()
@
suppress_complaints
(
verbose
=
self
.
_verbose
)
@
suppress_complaints
(
verbose
=
self
.
_verbose
,
notify
=
self
.
_force_exit
)
def
ordered_processing_task
(
sample_info_queue
,
sample_queue
,
out_order
):
def
ordered_processing_task
(
sample_info_queue
,
sample_queue
,
out_order
):
if
self
.
_verbose
==
0
:
if
self
.
_verbose
==
0
:
signal
.
signal
(
signal
.
SIGTERM
,
suppress_signal
)
signal
.
signal
(
signal
.
SIGTERM
,
suppress_signal
)
signal
.
signal
(
signal
.
SIGINT
,
suppress_signal
)
signal
.
signal
(
signal
.
SIGINT
,
suppress_signal
)
def
read_bytes
(
fpath
,
start
,
size
):
def
read_bytes
(
fpath
,
start
,
size
):
try
:
f
=
open
(
fpath
,
'r'
)
f
=
open
(
fpath
,
'r'
)
f
.
seek
(
start
,
0
)
f
.
seek
(
start
,
0
)
binary_bytes
=
f
.
read
(
size
)
binary_bytes
=
f
.
read
(
size
)
f
.
close
()
f
.
close
()
return
binary_bytes
return
binary_bytes
except
Exception
as
e
:
raise
CriticalException
(
e
)
ins
=
sample_info_queue
.
get
()
ins
=
sample_info_queue
.
get
()
...
@@ -352,16 +362,21 @@ class DataReader(object):
...
@@ -352,16 +362,21 @@ class DataReader(object):
w
.
start
()
w
.
start
()
finished_process_num
=
0
finished_process_num
=
0
while
finished_process_num
<
self
.
_process_num
:
sample
=
sample_queue
.
get
()
while
self
.
_force_exit
==
False
:
try
:
sample
=
sample_queue
.
get_nowait
()
except
Queue
.
Empty
:
time
.
sleep
(
0.001
)
else
:
if
isinstance
(
sample
,
EpochEndSignal
):
if
isinstance
(
sample
,
EpochEndSignal
):
finished_process_num
+=
1
finished_process_num
+=
1
if
finished_process_num
>=
self
.
_process_num
:
break
else
:
continue
continue
yield
sample
feeding_thread
.
join
()
yield
sample
for
w
in
workers
:
w
.
join
()
def
batch_iterator
(
self
,
batch_size
,
minimum_batch_size
):
def
batch_iterator
(
self
,
batch_size
,
minimum_batch_size
):
def
batch_to_ndarray
(
batch_samples
,
lod
):
def
batch_to_ndarray
(
batch_samples
,
lod
):
...
@@ -377,7 +392,7 @@ class DataReader(object):
...
@@ -377,7 +392,7 @@ class DataReader(object):
start
+=
frame_num
start
+=
frame_num
return
(
batch_feature
,
batch_label
)
return
(
batch_feature
,
batch_label
)
@
suppress_complaints
(
verbose
=
self
.
_verbose
)
@
suppress_complaints
(
verbose
=
self
.
_verbose
,
notify
=
self
.
_force_exit
)
def
batch_assembling_task
(
sample_generator
,
batch_queue
):
def
batch_assembling_task
(
sample_generator
,
batch_queue
):
batch_samples
=
[]
batch_samples
=
[]
lod
=
[
0
]
lod
=
[
0
]
...
@@ -406,7 +421,7 @@ class DataReader(object):
...
@@ -406,7 +421,7 @@ class DataReader(object):
assembling_thread
.
daemon
=
True
assembling_thread
.
daemon
=
True
assembling_thread
.
start
()
assembling_thread
.
start
()
while
Tru
e
:
while
self
.
_force_exit
==
Fals
e
:
try
:
try
:
batch_data
=
batch_queue
.
get_nowait
()
batch_data
=
batch_queue
.
get_nowait
()
except
Queue
.
Empty
:
except
Queue
.
Empty
:
...
@@ -415,5 +430,3 @@ class DataReader(object):
...
@@ -415,5 +430,3 @@ class DataReader(object):
if
isinstance
(
batch_data
,
EpochEndSignal
):
if
isinstance
(
batch_data
,
EpochEndSignal
):
break
break
yield
batch_data
yield
batch_data
assembling_thread
.
join
()
fluid/DeepASR/data_utils/util.py
浏览文件 @
bf281041
...
@@ -35,21 +35,40 @@ def lodtensor_to_ndarray(lod_tensor):
...
@@ -35,21 +35,40 @@ def lodtensor_to_ndarray(lod_tensor):
return
ret
,
lod_tensor
.
lod
()
return
ret
,
lod_tensor
.
lod
()
class
CriticalException
(
Exception
):
pass
def
suppress_signal
(
signo
,
stack_frame
):
def
suppress_signal
(
signo
,
stack_frame
):
pass
pass
def
suppress_complaints
(
verbose
):
def
suppress_complaints
(
verbose
,
notify
=
None
):
def
decorator_maker
(
func
):
def
decorator_maker
(
func
):
def
suppress_warpper
(
*
args
,
**
kwargs
):
def
suppress_warpper
(
*
args
,
**
kwargs
):
try
:
try
:
func
(
*
args
,
**
kwargs
)
func
(
*
args
,
**
kwargs
)
except
:
except
:
et
,
ev
,
tb
=
sys
.
exc_info
()
et
,
ev
,
tb
=
sys
.
exc_info
()
tb
=
Traceback
(
tb
)
if
verbose
==
1
:
if
notify
is
not
None
:
reraise
(
et
,
ev
,
tb
.
as_traceback
())
notify
(
except_type
=
et
,
except_value
=
ev
,
traceback
=
tb
)
if
verbose
==
1
or
isinstance
(
ev
,
CriticalException
):
reraise
(
et
,
ev
,
Traceback
(
tb
).
as_traceback
())
return
suppress_warpper
return
suppress_warpper
return
decorator_maker
return
decorator_maker
class
ForceExitWrapper
(
object
):
def
__init__
(
self
,
exit_flag
):
self
.
_exit_flag
=
exit_flag
@
suppress_complaints
(
verbose
=
0
)
def
__call__
(
self
,
*
args
,
**
kwargs
):
self
.
_exit_flag
.
value
=
True
def
__eq__
(
self
,
flag
):
return
self
.
_exit_flag
.
value
==
flag
fluid/ocr_recognition/ctc_reader.py
0 → 100644
浏览文件 @
bf281041
import
os
import
cv2
import
numpy
as
np
from
PIL
import
Image
from
paddle.v2.image
import
load_image
class
DataGenerator
(
object
):
def
__init__
(
self
):
pass
def
train_reader
(
self
,
img_root_dir
,
img_label_list
,
batchsize
):
'''
Reader interface for training.
:param img_root_dir: The root path of the image for training.
:type file_list: str
:param img_label_list: The path of the <image_name, label> file for training.
:type file_list: str
'''
img_label_lines
=
[]
if
batchsize
==
1
:
to_file
=
"tmp.txt"
cmd
=
"cat "
+
img_label_list
+
" | awk '{print $1,$2,$3,$4;}' | shuf > "
+
to_file
print
"cmd: "
+
cmd
os
.
system
(
cmd
)
print
"finish batch shuffle"
img_label_lines
=
open
(
to_file
,
'r'
).
readlines
()
else
:
to_file
=
"tmp.txt"
#cmd1: partial shuffle
cmd
=
"cat "
+
img_label_list
+
" | awk '{printf(
\"
%04d%.4f %s
\\
n
\"
, $1, rand(), $0)}' | sort | sed 1,$((1 + RANDOM % 100))d | "
#cmd2: batch merge and shuffle
cmd
+=
"awk '{printf $2
\"
\"
$3
\"
\"
$4
\"
\"
$5
\"
\"
; if(NR % "
+
str
(
batchsize
)
+
" == 0) print
\"\"
;}' | shuf | "
#cmd3: batch split
cmd
+=
"awk '{if(NF == "
+
str
(
batchsize
)
+
" * 4) {for(i = 0; i < "
+
str
(
batchsize
)
+
"; i++) print $(4*i+1)
\"
\"
$(4*i+2)
\"
\"
$(4*i+3)
\"
\"
$(4*i+4);}}' > "
+
to_file
print
"cmd: "
+
cmd
os
.
system
(
cmd
)
print
"finish batch shuffle"
img_label_lines
=
open
(
to_file
,
'r'
).
readlines
()
def
reader
():
sizes
=
len
(
img_label_lines
)
/
batchsize
for
i
in
range
(
sizes
):
result
=
[]
sz
=
[
0
,
0
]
for
j
in
range
(
batchsize
):
line
=
img_label_lines
[
i
*
batchsize
+
j
]
# h, w, img_name, labels
items
=
line
.
split
(
' '
)
label
=
[
int
(
c
)
for
c
in
items
[
-
1
].
split
(
','
)]
img
=
Image
.
open
(
os
.
path
.
join
(
img_root_dir
,
items
[
2
])).
convert
(
'L'
)
#zhuanhuidu
if
j
==
0
:
sz
=
img
.
size
img
=
img
.
resize
((
sz
[
0
],
sz
[
1
]))
img
=
np
.
array
(
img
)
-
127.5
img
=
img
[
np
.
newaxis
,
...]
result
.
append
([
img
,
label
])
yield
result
return
reader
def
test_reader
(
self
,
img_root_dir
,
img_label_list
):
'''
Reader interface for inference.
:param img_root_dir: The root path of the images for training.
:type file_list: str
:param img_label_list: The path of the <image_name, label> file for testing.
:type file_list: list
'''
def
reader
():
for
line
in
open
(
img_label_list
):
# h, w, img_name, labels
items
=
line
.
split
(
' '
)
label
=
[
int
(
c
)
for
c
in
items
[
-
1
].
split
(
','
)]
img
=
Image
.
open
(
os
.
path
.
join
(
img_root_dir
,
items
[
2
])).
convert
(
'L'
)
img
=
np
.
array
(
img
)
-
127.5
img
=
img
[
np
.
newaxis
,
...]
yield
img
,
label
return
reader
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录