From ade18e13bea1157554fd7070c45ed38ecd199463 Mon Sep 17 00:00:00 2001 From: dyning Date: Tue, 2 Jun 2020 15:53:02 +0800 Subject: [PATCH] add score in rec_infer --- ppocr/data/rec/dataset_traversal.py | 1 + ppocr/modeling/architectures/rec_model.py | 8 +++-- ppocr/modeling/heads/rec_attention_head.py | 13 ++++++--- tools/infer_rec.py | 34 ++++++++++++++-------- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py index f60b9fe3..92aa21ee 100755 --- a/ppocr/data/rec/dataset_traversal.py +++ b/ppocr/data/rec/dataset_traversal.py @@ -48,6 +48,7 @@ class LMDBReader(object): elif params['mode'] == "test": self.batch_size = 1 self.infer_img = params["infer_img"] + def load_hierarchical_lmdb_dataset(self): lmdb_sets = {} dataset_idx = 0 diff --git a/ppocr/modeling/architectures/rec_model.py b/ppocr/modeling/architectures/rec_model.py index d88c620b..fdc0a641 100755 --- a/ppocr/modeling/architectures/rec_model.py +++ b/ppocr/modeling/architectures/rec_model.py @@ -110,7 +110,11 @@ class RecModel(object): return loader, outputs elif mode == "export": predict = predicts['predict'] - predict = fluid.layers.softmax(predict) + if self.loss_type == "ctc": + predict = fluid.layers.softmax(predict) return [image, {'decoded_out': decoded_out, 'predicts': predict}] else: - return loader, {'decoded_out': decoded_out} + predict = predicts['predict'] + if self.loss_type == "ctc": + predict = fluid.layers.softmax(predict) + return loader, {'decoded_out': decoded_out, 'predicts': predict} diff --git a/ppocr/modeling/heads/rec_attention_head.py b/ppocr/modeling/heads/rec_attention_head.py index 8f5b4cc4..66c8f300 100755 --- a/ppocr/modeling/heads/rec_attention_head.py +++ b/ppocr/modeling/heads/rec_attention_head.py @@ -123,6 +123,8 @@ class AttentionPredict(object): full_ids = fluid.layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], dtype='int64', value=1) + full_scores = fluid.layers.fill_constant_batch_size_like( + input=init_state, shape=[-1, 1], dtype='float32', value=1) cond = layers.less_than(x=counter, y=array_len) while_op = layers.While(cond=cond) @@ -171,6 +173,9 @@ class AttentionPredict(object): new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1) fluid.layers.assign(new_ids, full_ids) + new_scores = fluid.layers.concat([full_scores, topk_scores], axis=1) + fluid.layers.assign(new_scores, full_scores) + layers.increment(x=counter, value=1, in_place=True) # update the memories @@ -184,7 +189,7 @@ class AttentionPredict(object): length_cond = layers.less_than(x=counter, y=array_len) finish_cond = layers.logical_not(layers.is_empty(x=topk_indices)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) - return full_ids + return full_ids, full_scores def __call__(self, inputs, labels=None, mode=None): encoder_features = self.encoder(inputs) @@ -223,10 +228,10 @@ class AttentionPredict(object): decoder_size, char_num) _, decoded_out = layers.topk(input=predict, k=1) decoded_out = layers.lod_reset(decoded_out, y=label_out) - predicts = {'predict': predict, 'decoded_out': decoded_out} + predicts = {'predict':predict, 'decoded_out':decoded_out} else: - ids = self.gru_attention_infer( + ids, predict = self.gru_attention_infer( decoder_boot, self.max_length, char_num, word_vector_dim, encoded_vector, encoded_proj, decoder_size) - predicts = {'decoded_out': ids} + predicts = {'predict':predict, 'decoded_out':ids} return predicts diff --git a/tools/infer_rec.py b/tools/infer_rec.py index 25bae1ca..51a603b6 100755 --- a/tools/infer_rec.py +++ b/tools/infer_rec.py @@ -79,34 +79,44 @@ def main(): blobs = reader_main(config, 'test')() infer_img = config['TestReader']['infer_img'] + loss_type = config['Global']['loss_type'] infer_list = get_image_file_list(infer_img) max_img_num = len(infer_list) if len(infer_list) == 0: logger.info("Can not find img in infer_img dir.") for i in range(max_img_num): - print("infer_img:",infer_list[i]) + logger.info("infer_img:%s" % infer_list[i]) img = next(blobs) predict = exe.run(program=eval_prog, feed={"image": img}, fetch_list=fetch_varname_list, return_numpy=False) - - preds = np.array(predict[0]) - if preds.shape[1] == 1: + if loss_type == "ctc": + preds = np.array(predict[0]) preds = preds.reshape(-1) preds_lod = predict[0].lod()[0] preds_text = char_ops.decode(preds) - else: + probs = np.array(predict[1]) + ind = np.argmax(probs, axis=1) + blank = probs.shape[1] + valid_ind = np.where(ind != (blank - 1))[0] + score = np.mean(probs[valid_ind, ind[valid_ind]]) + elif loss_type == "attention": + preds = np.array(predict[0]) + probs = np.array(predict[1]) end_pos = np.where(preds[0, :] == 1)[0] if len(end_pos) <= 1: - preds_text = preds[0, 1:] + preds = preds[0, 1:] + score = np.mean(probs[0, 1:]) else: - preds_text = preds[0, 1:end_pos[1]] - preds_text = preds_text.reshape(-1) - preds_text = char_ops.decode(preds_text) - - print("\t index:",preds) - print("\t word :",preds_text) + preds = preds[0, 1:end_pos[1]] + score = np.mean(probs[0, 1:end_pos[1]]) + preds = preds.reshape(-1) + preds_text = char_ops.decode(preds) + + print("\t index:", preds) + print("\t word :", preds_text) + print("\t score :", score) # save for inference model target_var = [] -- GitLab