Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
繁依Fanyi
CnOCR
提交
2b8e12dd
CnOCR
项目概览
繁依Fanyi
/
CnOCR
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
CnOCR
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2b8e12dd
编写于
4月 21, 2020
作者:
B
breezedeus
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimize model directory names
上级
d661eb19
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
41 addition
and
38 deletion
+41
-38
Makefile
Makefile
+14
-4
cnocr/cn_ocr.py
cnocr/cn_ocr.py
+1
-5
cnocr/consts.py
cnocr/consts.py
+4
-4
cnocr/utils.py
cnocr/utils.py
+1
-1
scripts/cnocr_train.py
scripts/cnocr_train.py
+19
-24
scripts/im2rec.py
scripts/im2rec.py
+2
-0
未找到文件。
Makefile
浏览文件 @
2b8e12dd
DATA_ROOT_DIR
=
data/sample-data
REC_DATA_ROOT_DIR
=
data/sample-data-lst
# ['conv', 'conv-lite-rnn', 'densenet', 'densenet-lite']
EMB_MODEL_TYPE
=
densenet-lite
SEQ_MODEL_TYPE
=
fc
MODEL_NAME
=
$(EMB_MODEL_TYPE)
-
$(SEQ_MODEL_TYPE)
# 产生 *.lst 文件
gen-lst
:
python scripts/im2rec.py
--list
--num-label
20
--chunks
1
--train-idx-fp
data/selected/train.txt
--test-idx-fp
data/selected/test.txt
--prefix
data/selected-lst/selected-data
python scripts/im2rec.py
--list
--num-label
20
--chunks
1
\
--train-idx-fp
$(DATA_ROOT_DIR)
/train.txt
--test-idx-fp
$(DATA_ROOT_DIR)
/test.txt
--prefix
$(REC_DATA_ROOT_DIR)
/sample-data
# 利用 *.lst 文件产生 *.idx 和 *.rec 文件。
# 真正的图片文件存储在 `examples` 目录,可通过 `--root` 指定。
gen-rec
:
python scripts/im2rec.py
--pack-label
--color
1
--num-thread
1
--prefix
data/selected-lst
--root
data/selected
python scripts/im2rec.py
--pack-label
--color
1
--num-thread
1
--prefix
$(REC_DATA_ROOT_DIR)
--root
examples
# 训练模型
train
:
python scripts/cnocr_train.py
--gpu
0
--emb_model_type
$(EMB_MODEL_TYPE)
--seq_model_type
$(SEQ_MODEL_TYPE)
--optimizer
adam
--epoch
50
--lr
1e-5
--train_file
data/selected-lst/selected-data_train
--test_file
data/selected-lst/selected-data_test
python scripts/cnocr_train.py
--gpu
0
--emb_model_type
$(EMB_MODEL_TYPE)
--seq_model_type
$(SEQ_MODEL_TYPE)
\
--optimizer
adam
--epoch
20
--lr
1e-4
\
--train_file
$(REC_DATA_ROOT_DIR)
/sample-data_train
--test_file
$(REC_DATA_ROOT_DIR)
/sample-data_test
evaluate
:
python scripts/cnocr_evaluate.py
--model-name
$(MODEL_NAME)
--model-epoch
2
-v
-i
data/selected/test.txt
--image-prefix-dir
data/selected
--batch-size
128
-o
evaluate/
$(MODEL_NAME)
python scripts/cnocr_evaluate.py
--model-name
$(MODEL_NAME)
--model-epoch
1
-v
-i
$(DATA_ROOT_DIR)
/test.txt
\
--image-prefix-dir
examples
--batch-size
128
-o
evaluate/
$(MODEL_NAME)
predict
:
python scripts/cnocr_predict.py
--model_name
$(MODEL_NAME)
--file
examples/rand_cn1.png
...
...
cnocr/cn_ocr.py
浏览文件 @
2b8e12dd
...
...
@@ -126,9 +126,7 @@ class CnOcr(object):
self
.
_model_epoch
=
model_epoch
or
AVAILABLE_MODELS
[
model_name
][
0
]
root
=
os
.
path
.
join
(
root
,
__version__
)
self
.
_model_dir
=
os
.
path
.
join
(
root
,
'%s-%04d'
%
(
self
.
_model_name
,
self
.
_model_epoch
)
)
self
.
_model_dir
=
os
.
path
.
join
(
root
,
self
.
_model_name
)
self
.
_assert_and_prepare_model_files
()
self
.
_alphabet
,
inv_alph_dict
=
read_charset
(
os
.
path
.
join
(
self
.
_model_dir
,
'label_cn.txt'
)
...
...
@@ -161,8 +159,6 @@ class CnOcr(object):
if
file_prepared
:
return
if
os
.
path
.
exists
(
model_dir
):
os
.
removedirs
(
model_dir
)
get_model_file
(
model_dir
)
def
_get_module
(
self
):
...
...
cnocr/consts.py
浏览文件 @
2b8e12dd
...
...
@@ -13,10 +13,10 @@ root_url = (
)
# name: (epochs, url)
AVAILABLE_MODELS
=
{
'conv-lstm'
:
(
50
,
root_url
+
'/conv-lstm
-0050
.zip'
),
'conv-lite-lstm'
:
(
45
,
root_url
+
'/conv-lite-lstm
-0045
.zip'
),
'conv-lite-fc'
:
(
27
,
root_url
+
'/conv-lite-fc
-0027
.zip'
),
'densenet-lite-lstm'
:
(
42
,
root_url
+
'/densenet-lite-lstm
-0042
.zip'
),
'conv-lstm'
:
(
50
,
root_url
+
'/conv-lstm.zip'
),
'conv-lite-lstm'
:
(
45
,
root_url
+
'/conv-lite-lstm.zip'
),
'conv-lite-fc'
:
(
27
,
root_url
+
'/conv-lite-fc.zip'
),
'densenet-lite-lstm'
:
(
42
,
root_url
+
'/densenet-lite-lstm.zip'
),
'densenet-lite-fc'
:
(
32
,
root_url
+
'/densenet-lite-fc.zip'
),
}
...
...
cnocr/utils.py
浏览文件 @
2b8e12dd
...
...
@@ -71,7 +71,7 @@ def get_model_file(model_dir):
zip_file_path
=
model_dir
+
'.zip'
if
not
os
.
path
.
exists
(
zip_file_path
):
model_name
=
os
.
path
.
basename
(
model_dir
)
.
rsplit
(
'-'
,
maxsplit
=
1
)[
0
]
model_name
=
os
.
path
.
basename
(
model_dir
)
if
model_name
not
in
AVAILABLE_MODELS
:
raise
NotImplementedError
(
'%s is not an available downloaded model'
%
model_name
)
url
=
AVAILABLE_MODELS
[
model_name
][
1
]
...
...
scripts/cnocr_train.py
浏览文件 @
2b8e12dd
...
...
@@ -39,41 +39,32 @@ from cnocr.fit.fit import fit
def
parse_args
():
# Parse command line arguments
parser
=
argparse
.
ArgumentParser
()
default_model_prefix
=
os
.
path
.
join
(
data_dir
(),
'models'
,
'cnocr-v{}'
.
format
(
__version__
)
)
parser
.
add_argument
(
"--emb_model_type"
,
help
=
"which embedding model to use"
,
choices
=
EMB_MODEL_TYPES
,
type
=
str
,
default
=
'conv-
rnn
'
,
default
=
'conv-
lite
'
,
)
parser
.
add_argument
(
"--seq_model_type"
,
help
=
'which sequence model to use'
,
default
=
'
lstm
'
,
default
=
'
fc
'
,
type
=
str
,
choices
=
SEQ_MODEL_TYPES
,
)
parser
.
add_argument
(
"--data_root"
,
help
=
"Path to image files"
,
type
=
str
,
default
=
'/Users/king/Documents/WhatIHaveDone/Test/text_renderer/output/wechat_simulator'
,
)
parser
.
add_argument
(
"--train_file"
,
help
=
"Path to train txt file"
,
type
=
str
,
default
=
'
/Users/king/Documents/WhatIHaveDone/Test/text_renderer/output/wechat_simulator
/train.txt'
,
default
=
'
data/sample-data-lst
/train.txt'
,
)
parser
.
add_argument
(
"--test_file"
,
help
=
"Path to test txt file"
,
type
=
str
,
default
=
'
/Users/king/Documents/WhatIHaveDone/Test/text_renderer/output/wechat_simulator
/test.txt'
,
default
=
'
data/sample-data-lst
/test.txt'
,
)
parser
.
add_argument
(
"--use_train_image_aug"
,
...
...
@@ -81,7 +72,10 @@ def parse_args():
help
=
"Whether to use image augmentation for training"
,
)
parser
.
add_argument
(
"--gpu"
,
help
=
"Number of GPUs for training [Default 0, means using cpu]"
,
type
=
int
,
default
=
0
"--gpu"
,
help
=
"Number of GPUs for training [Default 0, means using cpu]"
,
type
=
int
,
default
=
0
,
)
parser
.
add_argument
(
"--optimizer"
,
...
...
@@ -97,12 +91,7 @@ def parse_args():
type
=
int
,
help
=
'load the model on an epoch using the model-load-prefix [Default: no trained model will be loaded]'
,
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
0.001
,
help
=
'learning rate'
,
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
0.001
,
help
=
'learning rate'
)
parser
.
add_argument
(
'--wd'
,
type
=
float
,
default
=
0.0
,
help
=
'weight decay factor [Default: 0.0]'
)
...
...
@@ -113,9 +102,9 @@ def parse_args():
help
=
'value for clip gradient [Default: None, means no gradient will be clip]'
,
)
parser
.
add_argument
(
"--
prefix
"
,
help
=
"Checkpoint prefix [Default '{}']"
.
format
(
default_model_prefix
)
,
default
=
default_model_prefix
,
"--
out_model_dir
"
,
help
=
'output model directory'
,
default
=
os
.
path
.
join
(
data_dir
(),
__version__
)
,
)
return
parser
.
parse_args
()
...
...
@@ -124,7 +113,13 @@ def train_cnocr(args):
head
=
'%(asctime)-15s %(message)s'
logging
.
basicConfig
(
level
=
logging
.
DEBUG
,
format
=
head
)
args
.
model_name
=
args
.
emb_model_type
+
'-'
+
args
.
seq_model_type
args
.
prefix
=
'{}-{}'
.
format
(
args
.
prefix
,
args
.
model_name
)
out_dir
=
os
.
path
.
join
(
args
.
out_model_dir
,
args
.
model_name
)
print
(
'save models to dir: %s'
%
out_dir
,
flush
=
True
)
if
not
os
.
path
.
exists
(
out_dir
):
os
.
makedirs
(
out_dir
)
args
.
prefix
=
os
.
path
.
join
(
out_dir
,
'cnocr-v{}-{}'
.
format
(
__version__
,
args
.
model_name
)
)
hp
=
CnHyperparams
()
hp
=
_update_hp
(
hp
,
args
)
...
...
scripts/im2rec.py
浏览文件 @
2b8e12dd
...
...
@@ -152,6 +152,8 @@ def make_list_new(args):
prefix
=
''
else
:
working_dir
=
os
.
path
.
dirname
(
args
.
prefix
)
if
not
os
.
path
.
exists
(
working_dir
):
os
.
makedirs
(
working_dir
)
prefix
=
os
.
path
.
basename
(
args
.
prefix
)
test_list
=
read_file
(
args
.
test_idx_fp
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录