# coding=utf-8 # -- config : data -- train_file = 'data/chinese.train.txt' test_file = 'data/chinese.test.txt' vocab_file = 'data/vocab_cn.txt' # the file to save vocab build_vocab_method = 'fixed_size' # 'frequency' or 'fixed_size' vocab_max_size = 3000 # when build_vocab_method = 'fixed_size' unk_threshold = 1 # # when build_vocab_method = 'frequency' min_sentence_length = 3 max_sentence_length = 60 # -- config : train -- use_which_model = 'ngram' # must be: 'rnn' or 'ngram' use_gpu = False # whether to use gpu trainer_count = 1 # number of trainer class Config_rnn(object): """ config for RNN language model """ rnn_type = 'gru' # or 'lstm' emb_dim = 200 hidden_size = 200 num_layer = 2 num_passs = 2 batch_size = 32 model_file_name_prefix = 'lm_' + rnn_type + '_params_pass_' class Config_ngram(object): """ config for N-Gram language model """ emb_dim = 200 hidden_size = 200 num_layer = 2 N = 5 num_passs = 2 batch_size = 32 model_file_name_prefix = 'lm_ngram_pass_' # -- config : infer -- input_file = 'data/input.txt' # input file contains sentence prefix each line output_file = 'data/output.txt' # the file to save results num_words = 10 # the max number of words need to generate beam_size = 5 # beam_width, the number of the prediction sentence for each prefix