未验证 提交 6f04c0da 编写于 作者: D Double_V 提交者: GitHub

Ocr use new API (#4290)

* update new API for ocr
* fix the code style
上级 5312aaa1
...@@ -24,6 +24,7 @@ sos = 0 ...@@ -24,6 +24,7 @@ sos = 0
eos = 1 eos = 1
beam_size = 1 beam_size = 1
def conv_bn_pool(input, def conv_bn_pool(input,
group, group,
out_ch, out_ch,
...@@ -164,12 +165,13 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, ...@@ -164,12 +165,13 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
def attention_train_net(args, data_shape, num_classes): def attention_train_net(args, data_shape, num_classes):
if len(list(data_shape)) == 3:
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') data_shape = [None] + list(data_shape)
label_in = fluid.layers.data( images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
name='label_in', shape=[1], dtype='int32', lod_level=1) label_in = fluid.data(
label_out = fluid.layers.data( name='label_in', shape=[None, 1], dtype='int32', lod_level=1)
name='label_out', shape=[1], dtype='int32', lod_level=1) label_out = fluid.data(
name='label_out', shape=[None, 1], dtype='int32', lod_level=1)
gru_backward, encoded_vector, encoded_proj = encoder_net(images) gru_backward, encoded_vector, encoded_proj = encoder_net(images)
...@@ -188,7 +190,8 @@ def attention_train_net(args, data_shape, num_classes): ...@@ -188,7 +190,8 @@ def attention_train_net(args, data_shape, num_classes):
prediction = gru_decoder_with_attention(trg_embedding, encoded_vector, prediction = gru_decoder_with_attention(trg_embedding, encoded_vector,
encoded_proj, decoder_boot, encoded_proj, decoder_boot,
decoder_size, num_classes) decoder_size, num_classes)
fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.gradient_clip)) fluid.clip.set_gradient_clip(
fluid.clip.GradientClipByGlobalNorm(args.gradient_clip))
label_out = fluid.layers.cast(x=label_out, dtype='int64') label_out = fluid.layers.cast(x=label_out, dtype='int64')
_, maxid = fluid.layers.topk(input=prediction, k=1) _, maxid = fluid.layers.topk(input=prediction, k=1)
...@@ -264,10 +267,10 @@ def attention_infer(images, num_classes, use_cudnn=True): ...@@ -264,10 +267,10 @@ def attention_infer(images, num_classes, use_cudnn=True):
ids_array = fluid.layers.create_array('int64') ids_array = fluid.layers.create_array('int64')
scores_array = fluid.layers.create_array('float32') scores_array = fluid.layers.create_array('float32')
init_ids = fluid.layers.data( init_ids = fluid.data(
name="init_ids", shape=[1], dtype="int64", lod_level=2) name="init_ids", shape=[None, 1], dtype="int64", lod_level=2)
init_scores = fluid.layers.data( init_scores = fluid.data(
name="init_scores", shape=[1], dtype="float32", lod_level=2) name="init_scores", shape=[None, 1], dtype="float32", lod_level=2)
fluid.layers.array_write(init_ids, array=ids_array, i=counter) fluid.layers.array_write(init_ids, array=ids_array, i=counter)
fluid.layers.array_write(init_scores, array=scores_array, i=counter) fluid.layers.array_write(init_scores, array=scores_array, i=counter)
...@@ -349,11 +352,13 @@ def attention_infer(images, num_classes, use_cudnn=True): ...@@ -349,11 +352,13 @@ def attention_infer(images, num_classes, use_cudnn=True):
def attention_eval(data_shape, num_classes, use_cudnn=True): def attention_eval(data_shape, num_classes, use_cudnn=True):
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') if len(list(data_shape)) == 3:
label_in = fluid.layers.data( data_shape = [None] + data_shape
name='label_in', shape=[1], dtype='int32', lod_level=1) images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
label_out = fluid.layers.data( label_in = fluid.data(
name='label_out', shape=[1], dtype='int32', lod_level=1) name='label_in', shape=[None, 1], dtype='int32', lod_level=1)
label_out = fluid.data(
name='label_out', shape=[None, 1], dtype='int32', lod_level=1)
label_out = fluid.layers.cast(x=label_out, dtype='int64') label_out = fluid.layers.cast(x=label_out, dtype='int64')
label_in = fluid.layers.cast(x=label_in, dtype='int64') label_in = fluid.layers.cast(x=label_in, dtype='int64')
......
...@@ -188,10 +188,11 @@ def ctc_train_net(args, data_shape, num_classes): ...@@ -188,10 +188,11 @@ def ctc_train_net(args, data_shape, num_classes):
MOMENTUM = args.momentum MOMENTUM = args.momentum
learning_rate_decay = None learning_rate_decay = None
regularizer = fluid.regularizer.L2Decay(L2_RATE) regularizer = fluid.regularizer.L2Decay(L2_RATE)
if len(list(data_shape)) == 3:
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') data_shape = [None] + list(data_shape)
label = fluid.layers.data( images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
name='label', shape=[1], dtype='int32', lod_level=1) label = fluid.data(
name='label', shape=[None, 1], dtype='int32', lod_level=1)
fc_out = encoder_net( fc_out = encoder_net(
images, images,
num_classes, num_classes,
...@@ -231,9 +232,11 @@ def ctc_infer(images, num_classes, use_cudnn=True): ...@@ -231,9 +232,11 @@ def ctc_infer(images, num_classes, use_cudnn=True):
def ctc_eval(data_shape, num_classes, use_cudnn=True): def ctc_eval(data_shape, num_classes, use_cudnn=True):
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') if len(list(data_shape)) == 3:
label = fluid.layers.data( data_shape = [None] + list(data_shape)
name='label', shape=[1], dtype='int32', lod_level=1) images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.data(
name='label', shape=[None, 1], dtype='int32', lod_level=1)
fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn) fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
decoded_out = fluid.layers.ctc_greedy_decoder( decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes) input=fc_out, blank=num_classes)
......
...@@ -32,7 +32,8 @@ except NameError: ...@@ -32,7 +32,8 @@ except NameError:
SOS = 0 SOS = 0
EOS = 1 EOS = 1
NUM_CLASSES = 95 NUM_CLASSES = 95
DATA_SHAPE = [1, 48, 512] IMG_WIDTH = 384
DATA_SHAPE = [1, 48, IMG_WIDTH]
DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5" DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5"
DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz" DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz"
......
...@@ -64,11 +64,11 @@ def evaluate(args): ...@@ -64,11 +64,11 @@ def evaluate(args):
# load init model # load init model
model_dir = args.model_path model_dir = args.model_path
model_file_name = None if os.path.isdir(args.model_path):
if not os.path.isdir(args.model_path): raise Exception("{} should not be a directory".format(args.model_path))
model_dir = os.path.dirname(args.model_path) fluid.load(program=fluid.default_main_program(),
model_file_name = os.path.basename(args.model_path) model_path=model_dir,
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) executor=exe)
print("Init model from: %s." % args.model_path) print("Init model from: %s." % args.model_path)
evaluator.reset(exe) evaluator.reset(exe)
......
...@@ -54,7 +54,9 @@ def inference(args): ...@@ -54,7 +54,9 @@ def inference(args):
num_classes = data_reader.num_classes() num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape() data_shape = data_reader.data_shape()
# define network # define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') if len(list(data_shape)) == 3:
data_shape = [None] + list(data_shape)
images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False) ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False)
# data reader # data reader
infer_reader = data_reader.inference( infer_reader = data_reader.inference(
...@@ -82,11 +84,10 @@ def inference(args): ...@@ -82,11 +84,10 @@ def inference(args):
# load init model # load init model
model_dir = args.model_path model_dir = args.model_path
model_file_name = None fluid.load(
if not os.path.isdir(args.model_path): program=fluid.default_main_program(),
model_dir = os.path.dirname(args.model_path) model_path=model_dir,
model_file_name = os.path.basename(args.model_path) executor=exe)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print("Init model from: %s." % args.model_path) print("Init model from: %s." % args.model_path)
batch_times = [] batch_times = []
......
...@@ -106,11 +106,7 @@ def train(args): ...@@ -106,11 +106,7 @@ def train(args):
# load init model # load init model
if args.init_model is not None: if args.init_model is not None:
model_dir = args.init_model model_dir = args.init_model
model_file_name = None fluid.load(fluid.default_main_program(), model_dir)
if not os.path.isdir(args.init_model):
model_dir = os.path.dirname(args.init_model)
model_file_name = os.path.basename(args.init_model)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print("Init model from: %s." % args.init_model) print("Init model from: %s." % args.init_model)
train_exe = exe train_exe = exe
...@@ -148,8 +144,8 @@ def train(args): ...@@ -148,8 +144,8 @@ def train(args):
def save_model(args, exe, iter_num): def save_model(args, exe, iter_num):
filename = "model_%05d" % iter_num filename = "model_%05d" % iter_num
fluid.io.save_params( fluid.save(fluid.default_main_program(),
exe, dirname=args.save_model_dir, filename=filename) os.path.join(args.save_model_dir, filename))
print("Saved model to: %s/%s." % (args.save_model_dir, filename)) print("Saved model to: %s/%s." % (args.save_model_dir, filename))
iter_num = 0 iter_num = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册