config.py 3.0 KB
Newer Older
P
Peng Li 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
import math

__all__ = ["TrainingConfig", "InferConfig"]


class CommonConfig(object):
    def __init__(self):
        # network size:
        # dimension of the question LSTM
        self.q_lstm_dim = 64
        # dimension of the attention layer
        self.latent_chain_dim = 64
        # dimension of the evidence LSTMs
        self.e_lstm_dim = 64
        # dimension of the qe.comm and ee.comm feature embeddings
        self.com_vec_dim = 2
        self.drop_rate = 0.05

        # CRF:
        # valid values are BIO and BIO2
        self.label_schema = "BIO2"

        # word embedding:
        # vocabulary file path
        self.word_dict_path = "data/embedding/wordvecs.vcb"
        # word embedding file path
        self.wordvecs_path = "data/embedding/wordvecs.txt"
        self.word_vec_dim = 64

        # saving model & logs:
        # dir for saving models
        self.model_save_dir = "models"

        # print training info every log_period batches
        self.log_period = 100
        # show parameter status every show_parameter_status_period batches
        self.show_parameter_status_period = 100

    @property
    def label_num(self):
        if self.label_schema == "BIO":
            return 3
        elif self.label_schema == "BIO2":
            return 4
        else:
            raise ValueError("wrong value for label_schema")

    @property
    def default_init_std(self):
        return 1 / math.sqrt(self.e_lstm_dim * 4)

    @property
    def default_l2_rate(self):
        return 8e-4 * self.batch_size / 6

    @property
    def dict_dim(self):
        return len(self.vocab)


class TrainingConfig(CommonConfig):
    def __init__(self):
        super(TrainingConfig, self).__init__()

        # data:
        # training data path
        self.train_data_path = "data/data/training.json.gz"

        # number of batches used in each pass
        self.batches_per_pass = 1000
        # number of passes to train
        self.num_passes = 25
        # batch size
        self.batch_size = 120

        # the ratio of negative samples used in training
        self.negative_sample_ratio = 0.2
        # the ratio of negative samples that contain golden answer string
        self.hit_ans_negative_sample_ratio = 0.25

        # keep only first B in golden labels
        self.keep_first_b = False

        # use GPU to train the model
        self.use_gpu = False
        # number of threads
        self.trainer_count = 1

        # random seeds:
        # data reader random seed, 0 for random seed
        self.seed = 0
        # paddle random seed, 0 for random seed
        self.paddle_seed = 0

        # optimizer:
        self.learning_rate = 1e-3
        # rmsprop
        self.rho = 0.95
        self.epsilon = 1e-4
        # model average
        self.average_window = 0.5
        self.max_average_window = 10000


class InferConfig(CommonConfig):
    def __init__(self):
        super(InferConfig, self).__init__()

        self.use_gpu = False
        self.trainer_count = 1
        self.batch_size = 120
        self.wordvecs = None