""" util tools """ from __future__ import print_function import os import sys import numpy as np import paddle.fluid as fluid def str2bool(v): """ argparse does not support True or False in python """ return v.lower() in ("true", "t", "1") class ArgumentGroup(object): """ Put arguments to one group """ def __init__(self, parser, title, des): """none""" self._group = parser.add_argument_group(title=title, description=des) def add_arg(self, name, type, default, help, **kwargs): """ Add argument """ type = str2bool if type == bool else type self._group.add_argument( "--" + name, default=default, type=type, help=help + ' Default: %(default)s.', **kwargs) def print_arguments(args): """none""" print('----------- Configuration Arguments -----------') for arg, value in sorted(vars(args).items()): print('%s: %s' % (arg, value)) print('------------------------------------------------') def to_str(string, encoding="utf-8"): """convert to str for print""" if sys.version_info.major == 3: if isinstance(string, bytes): return string.decode(encoding) elif sys.version_info.major == 2: if isinstance(string, unicode): return string.encode(encoding) return string def to_lodtensor(data, place): """ Convert data in list into lodtensor. """ seq_lens = [len(seq) for seq in data] cur_len = 0 lod = [cur_len] for l in seq_lens: cur_len += l lod.append(cur_len) flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = flattened_data.reshape([len(flattened_data), 1]) res = fluid.LoDTensor() res.set(flattened_data, place) res.set_lod([lod]) return res def parse_result(words, crf_decode, dataset): """ parse result """ offset_list = (crf_decode.lod())[0] words = np.array(words) crf_decode = np.array(crf_decode) batch_size = len(offset_list) - 1 batch_out_str = [] for sent_index in range(batch_size): sent_out_str = "" sent_len = offset_list[sent_index + 1] - offset_list[sent_index] last_word = "" last_tag = "" for tag_index in range(sent_len): # iterate every word in sent index = tag_index + offset_list[sent_index] cur_word_id = str(words[index][0]) cur_tag_id = str(crf_decode[index][0]) cur_word = dataset.id2word_dict[cur_word_id] cur_tag = dataset.id2label_dict[cur_tag_id] if last_word == "": last_word = cur_word last_tag = cur_tag[:-2] elif cur_tag.endswith("-B") or cur_tag == "O": sent_out_str += last_word + u"/" + last_tag + u" " last_word = cur_word last_tag = cur_tag[:-2] elif cur_tag.endswith("-I"): last_word += cur_word else: raise ValueError("invalid tag: %s" % (cur_tag)) if cur_word != "": sent_out_str += last_word + u"/" + last_tag + u" " sent_out_str = to_str(sent_out_str.strip()) batch_out_str.append(sent_out_str) return batch_out_str def init_checkpoint(exe, init_checkpoint_path, main_program): """ Init CheckPoint """ assert os.path.exists( init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path def existed_persitables(var): """ If existed presitabels """ if not fluid.io.is_persistable(var): return False return os.path.exists(os.path.join(init_checkpoint_path, var.name)) fluid.io.load_vars( exe, init_checkpoint_path, main_program=main_program, predicate=existed_persitables) print("Load model from {}".format(init_checkpoint_path)) def init_pretraining_params(exe, pretraining_params_path, main_program, use_fp16=False): """load params of pretrained model, NOT including moment, learning_rate""" assert os.path.exists(pretraining_params_path ), "[%s] cann't be found." % pretraining_params_path def _existed_params(var): if not isinstance(var, fluid.framework.Parameter): return False return os.path.exists(os.path.join(pretraining_params_path, var.name)) fluid.io.load_vars( exe, pretraining_params_path, main_program=main_program, predicate=_existed_params) print("Load pretraining parameters from {}.".format( pretraining_params_path))