diff --git a/fluid/machine_reading_comprehesion/DuReader/README.md b/fluid/machine_reading_comprehesion/README.md similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/README.md rename to fluid/machine_reading_comprehesion/README.md diff --git a/fluid/machine_reading_comprehesion/DuReader/args.py b/fluid/machine_reading_comprehesion/args.py similarity index 71% rename from fluid/machine_reading_comprehesion/DuReader/args.py rename to fluid/machine_reading_comprehesion/args.py index 96422f4460cf0570d410c145f36e5074c9fc1013..228375584eec4d9602bb77a853cfd61c4016e909 100644 --- a/fluid/machine_reading_comprehesion/DuReader/args.py +++ b/fluid/machine_reading_comprehesion/args.py @@ -26,16 +26,11 @@ def parse_args(): '--prepare', action='store_true', help='create the directories, prepare the vocabulary and embeddings') + parser.add_argument('--train', action='store_true', help='train the model') parser.add_argument( - '--train', - action='store_true', - help='train the model') - parser.add_argument( - '--evaluate', - action='store_true', - help='evaluate the model on dev set') + '--evaluate', action='store_true', help='evaluate the model on dev set') parser.add_argument( - '--predict', + '--predict', action='store_true', help='predict the answers for test set with trained model') parser.add_argument( @@ -96,55 +91,19 @@ def parse_args(): default=1000, help="cal dev loss every n batches." "(default: %(default)d)") - parser.add_argument( - '--optim', - default='adam', - help='optimizer type') - parser.add_argument( - '--trainset', - nargs='+', - help='train dataset') - parser.add_argument( - '--devset', - nargs='+', - help='dev dataset') - parser.add_argument( - '--testset', - nargs='+', - help='test dataset') - parser.add_argument( - '--vocab_dir', - help='dict') - parser.add_argument( - '--max_p_num', - type=int, - default=5) - parser.add_argument( - '--max_a_len', - type=int, - default=200) - parser.add_argument( - '--max_p_len', - type=int, - default=500) - parser.add_argument( - '--max_q_len', - type=int, - default=9) - parser.add_argument( - '--doc_num', - type=int, - default=5) - parser.add_argument( - '--para_print', - action='store_true') - parser.add_argument( - '--drop_rate', - type=float, default=0.0) - parser.add_argument( - '--random_seed', - type=int, - default=123) + parser.add_argument('--optim', default='adam', help='optimizer type') + parser.add_argument('--trainset', nargs='+', help='train dataset') + parser.add_argument('--devset', nargs='+', help='dev dataset') + parser.add_argument('--testset', nargs='+', help='test dataset') + parser.add_argument('--vocab_dir', help='dict') + parser.add_argument('--max_p_num', type=int, default=5) + parser.add_argument('--max_a_len', type=int, default=200) + parser.add_argument('--max_p_len', type=int, default=500) + parser.add_argument('--max_q_len', type=int, default=9) + parser.add_argument('--doc_num', type=int, default=5) + parser.add_argument('--para_print', action='store_true') + parser.add_argument('--drop_rate', type=float, default=0.0) + parser.add_argument('--random_seed', type=int, default=123) parser.add_argument( '--log_path', help='path of the log file. If not set, logs are printed to console') diff --git a/fluid/machine_reading_comprehesion/DuReader/data/download.sh b/fluid/machine_reading_comprehesion/data/download.sh similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/data/download.sh rename to fluid/machine_reading_comprehesion/data/download.sh diff --git a/fluid/machine_reading_comprehesion/DuReader/data/md5sum.txt b/fluid/machine_reading_comprehesion/data/md5sum.txt similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/data/md5sum.txt rename to fluid/machine_reading_comprehesion/data/md5sum.txt diff --git a/fluid/machine_reading_comprehesion/DuReader/dataset.py b/fluid/machine_reading_comprehesion/dataset.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/dataset.py rename to fluid/machine_reading_comprehesion/dataset.py diff --git a/fluid/machine_reading_comprehesion/DuReader/rc_model.py b/fluid/machine_reading_comprehesion/rc_model.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/rc_model.py rename to fluid/machine_reading_comprehesion/rc_model.py diff --git a/fluid/machine_reading_comprehesion/DuReader/run.py b/fluid/machine_reading_comprehesion/run.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/run.py rename to fluid/machine_reading_comprehesion/run.py diff --git a/fluid/machine_reading_comprehesion/DuReader/run.sh b/fluid/machine_reading_comprehesion/run.sh similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/run.sh rename to fluid/machine_reading_comprehesion/run.sh diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/__init__.py b/fluid/machine_reading_comprehesion/utils/__init__.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/__init__.py rename to fluid/machine_reading_comprehesion/utils/__init__.py diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/download_thirdparty.sh b/fluid/machine_reading_comprehesion/utils/download_thirdparty.sh similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/download_thirdparty.sh rename to fluid/machine_reading_comprehesion/utils/download_thirdparty.sh diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/dureader_eval.py b/fluid/machine_reading_comprehesion/utils/dureader_eval.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/dureader_eval.py rename to fluid/machine_reading_comprehesion/utils/dureader_eval.py diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/get_vocab.py b/fluid/machine_reading_comprehesion/utils/get_vocab.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/get_vocab.py rename to fluid/machine_reading_comprehesion/utils/get_vocab.py diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/marco_tokenize_data.py b/fluid/machine_reading_comprehesion/utils/marco_tokenize_data.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/marco_tokenize_data.py rename to fluid/machine_reading_comprehesion/utils/marco_tokenize_data.py diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/marcov1_to_dureader.py b/fluid/machine_reading_comprehesion/utils/marcov1_to_dureader.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/marcov1_to_dureader.py rename to fluid/machine_reading_comprehesion/utils/marcov1_to_dureader.py diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/marcov2_to_v1_tojsonl.py b/fluid/machine_reading_comprehesion/utils/marcov2_to_v1_tojsonl.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/marcov2_to_v1_tojsonl.py rename to fluid/machine_reading_comprehesion/utils/marcov2_to_v1_tojsonl.py diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/preprocess.py b/fluid/machine_reading_comprehesion/utils/preprocess.py similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/preprocess.py rename to fluid/machine_reading_comprehesion/utils/preprocess.py diff --git a/fluid/machine_reading_comprehesion/DuReader/utils/run_marco2dureader_preprocess.sh b/fluid/machine_reading_comprehesion/utils/run_marco2dureader_preprocess.sh similarity index 100% rename from fluid/machine_reading_comprehesion/DuReader/utils/run_marco2dureader_preprocess.sh rename to fluid/machine_reading_comprehesion/utils/run_marco2dureader_preprocess.sh diff --git a/fluid/machine_reading_comprehesion/DuReader/vocab.py b/fluid/machine_reading_comprehesion/vocab.py similarity index 98% rename from fluid/machine_reading_comprehesion/DuReader/vocab.py rename to fluid/machine_reading_comprehesion/vocab.py index 4b66dffa791698ea5194618a2ab755c810b2c8de..14b608052132cc5c6f46810778511bc9a6a6915b 100644 --- a/fluid/machine_reading_comprehesion/DuReader/vocab.py +++ b/fluid/machine_reading_comprehesion/vocab.py @@ -25,6 +25,7 @@ class Vocab(object): """ Implements a vocabulary to store the tokens in the data, with their corresponding embeddings. """ + def __init__(self, filename=None, initial_tokens=None, lower=False): self.id2token = {} self.token2id = {} @@ -117,7 +118,9 @@ class Vocab(object): Args: min_cnt: tokens with frequency less than min_cnt is filtered """ - filtered_tokens = [token for token in self.token2id if self.token_cnt[token] >= min_cnt] + filtered_tokens = [ + token for token in self.token2id if self.token_cnt[token] >= min_cnt + ] # rebuild the token x id map self.token2id = {} self.id2token = {}