From 6f04c0da75f4caa9069bbf2f775fd95e97a20d92 Mon Sep 17 00:00:00 2001 From: Double_V Date: Mon, 17 Feb 2020 19:52:01 +0800 Subject: [PATCH] Ocr use new API (#4290) * update new API for ocr * fix the code style --- PaddleCV/ocr_recognition/attention_model.py | 37 ++++++++++++--------- PaddleCV/ocr_recognition/crnn_ctc_model.py | 17 ++++++---- PaddleCV/ocr_recognition/data_reader.py | 3 +- PaddleCV/ocr_recognition/eval.py | 10 +++--- PaddleCV/ocr_recognition/infer.py | 13 ++++---- PaddleCV/ocr_recognition/train.py | 10 ++---- 6 files changed, 48 insertions(+), 42 deletions(-) diff --git a/PaddleCV/ocr_recognition/attention_model.py b/PaddleCV/ocr_recognition/attention_model.py index 4a2dad27..5ea8886f 100755 --- a/PaddleCV/ocr_recognition/attention_model.py +++ b/PaddleCV/ocr_recognition/attention_model.py @@ -24,6 +24,7 @@ sos = 0 eos = 1 beam_size = 1 + def conv_bn_pool(input, group, out_ch, @@ -164,12 +165,13 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, def attention_train_net(args, data_shape, num_classes): - - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label_in = fluid.layers.data( - name='label_in', shape=[1], dtype='int32', lod_level=1) - label_out = fluid.layers.data( - name='label_out', shape=[1], dtype='int32', lod_level=1) + if len(list(data_shape)) == 3: + data_shape = [None] + list(data_shape) + images = fluid.data(name='pixel', shape=data_shape, dtype='float32') + label_in = fluid.data( + name='label_in', shape=[None, 1], dtype='int32', lod_level=1) + label_out = fluid.data( + name='label_out', shape=[None, 1], dtype='int32', lod_level=1) gru_backward, encoded_vector, encoded_proj = encoder_net(images) @@ -188,7 +190,8 @@ def attention_train_net(args, data_shape, num_classes): prediction = gru_decoder_with_attention(trg_embedding, encoded_vector, encoded_proj, decoder_boot, decoder_size, num_classes) - fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.gradient_clip)) + fluid.clip.set_gradient_clip( + fluid.clip.GradientClipByGlobalNorm(args.gradient_clip)) label_out = fluid.layers.cast(x=label_out, dtype='int64') _, maxid = fluid.layers.topk(input=prediction, k=1) @@ -264,10 +267,10 @@ def attention_infer(images, num_classes, use_cudnn=True): ids_array = fluid.layers.create_array('int64') scores_array = fluid.layers.create_array('float32') - init_ids = fluid.layers.data( - name="init_ids", shape=[1], dtype="int64", lod_level=2) - init_scores = fluid.layers.data( - name="init_scores", shape=[1], dtype="float32", lod_level=2) + init_ids = fluid.data( + name="init_ids", shape=[None, 1], dtype="int64", lod_level=2) + init_scores = fluid.data( + name="init_scores", shape=[None, 1], dtype="float32", lod_level=2) fluid.layers.array_write(init_ids, array=ids_array, i=counter) fluid.layers.array_write(init_scores, array=scores_array, i=counter) @@ -349,11 +352,13 @@ def attention_infer(images, num_classes, use_cudnn=True): def attention_eval(data_shape, num_classes, use_cudnn=True): - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label_in = fluid.layers.data( - name='label_in', shape=[1], dtype='int32', lod_level=1) - label_out = fluid.layers.data( - name='label_out', shape=[1], dtype='int32', lod_level=1) + if len(list(data_shape)) == 3: + data_shape = [None] + data_shape + images = fluid.data(name='pixel', shape=data_shape, dtype='float32') + label_in = fluid.data( + name='label_in', shape=[None, 1], dtype='int32', lod_level=1) + label_out = fluid.data( + name='label_out', shape=[None, 1], dtype='int32', lod_level=1) label_out = fluid.layers.cast(x=label_out, dtype='int64') label_in = fluid.layers.cast(x=label_in, dtype='int64') diff --git a/PaddleCV/ocr_recognition/crnn_ctc_model.py b/PaddleCV/ocr_recognition/crnn_ctc_model.py index 7650478a..55db15ca 100755 --- a/PaddleCV/ocr_recognition/crnn_ctc_model.py +++ b/PaddleCV/ocr_recognition/crnn_ctc_model.py @@ -188,10 +188,11 @@ def ctc_train_net(args, data_shape, num_classes): MOMENTUM = args.momentum learning_rate_decay = None regularizer = fluid.regularizer.L2Decay(L2_RATE) - - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int32', lod_level=1) + if len(list(data_shape)) == 3: + data_shape = [None] + list(data_shape) + images = fluid.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.data( + name='label', shape=[None, 1], dtype='int32', lod_level=1) fc_out = encoder_net( images, num_classes, @@ -231,9 +232,11 @@ def ctc_infer(images, num_classes, use_cudnn=True): def ctc_eval(data_shape, num_classes, use_cudnn=True): - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int32', lod_level=1) + if len(list(data_shape)) == 3: + data_shape = [None] + list(data_shape) + images = fluid.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.data( + name='label', shape=[None, 1], dtype='int32', lod_level=1) fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn) decoded_out = fluid.layers.ctc_greedy_decoder( input=fc_out, blank=num_classes) diff --git a/PaddleCV/ocr_recognition/data_reader.py b/PaddleCV/ocr_recognition/data_reader.py index 6e42e2af..3db5c106 100644 --- a/PaddleCV/ocr_recognition/data_reader.py +++ b/PaddleCV/ocr_recognition/data_reader.py @@ -32,7 +32,8 @@ except NameError: SOS = 0 EOS = 1 NUM_CLASSES = 95 -DATA_SHAPE = [1, 48, 512] +IMG_WIDTH = 384 +DATA_SHAPE = [1, 48, IMG_WIDTH] DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5" DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz" diff --git a/PaddleCV/ocr_recognition/eval.py b/PaddleCV/ocr_recognition/eval.py index c139331e..b1b288bf 100644 --- a/PaddleCV/ocr_recognition/eval.py +++ b/PaddleCV/ocr_recognition/eval.py @@ -64,11 +64,11 @@ def evaluate(args): # load init model model_dir = args.model_path - model_file_name = None - if not os.path.isdir(args.model_path): - model_dir = os.path.dirname(args.model_path) - model_file_name = os.path.basename(args.model_path) - fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + if os.path.isdir(args.model_path): + raise Exception("{} should not be a directory".format(args.model_path)) + fluid.load(program=fluid.default_main_program(), + model_path=model_dir, + executor=exe) print("Init model from: %s." % args.model_path) evaluator.reset(exe) diff --git a/PaddleCV/ocr_recognition/infer.py b/PaddleCV/ocr_recognition/infer.py index 7b790b52..f9843ca8 100755 --- a/PaddleCV/ocr_recognition/infer.py +++ b/PaddleCV/ocr_recognition/infer.py @@ -54,7 +54,9 @@ def inference(args): num_classes = data_reader.num_classes() data_shape = data_reader.data_shape() # define network - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + if len(list(data_shape)) == 3: + data_shape = [None] + list(data_shape) + images = fluid.data(name='pixel', shape=data_shape, dtype='float32') ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False) # data reader infer_reader = data_reader.inference( @@ -82,11 +84,10 @@ def inference(args): # load init model model_dir = args.model_path - model_file_name = None - if not os.path.isdir(args.model_path): - model_dir = os.path.dirname(args.model_path) - model_file_name = os.path.basename(args.model_path) - fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + fluid.load( + program=fluid.default_main_program(), + model_path=model_dir, + executor=exe) print("Init model from: %s." % args.model_path) batch_times = [] diff --git a/PaddleCV/ocr_recognition/train.py b/PaddleCV/ocr_recognition/train.py index 3baf0535..2f09d476 100755 --- a/PaddleCV/ocr_recognition/train.py +++ b/PaddleCV/ocr_recognition/train.py @@ -106,11 +106,7 @@ def train(args): # load init model if args.init_model is not None: model_dir = args.init_model - model_file_name = None - if not os.path.isdir(args.init_model): - model_dir = os.path.dirname(args.init_model) - model_file_name = os.path.basename(args.init_model) - fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) + fluid.load(fluid.default_main_program(), model_dir) print("Init model from: %s." % args.init_model) train_exe = exe @@ -148,8 +144,8 @@ def train(args): def save_model(args, exe, iter_num): filename = "model_%05d" % iter_num - fluid.io.save_params( - exe, dirname=args.save_model_dir, filename=filename) + fluid.save(fluid.default_main_program(), + os.path.join(args.save_model_dir, filename)) print("Saved model to: %s/%s." % (args.save_model_dir, filename)) iter_num = 0 -- GitLab