+ Electra([ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://arxiv.org/abs/2003.10555)), 支持hidden_size=256的`chinese-electra-discriminator-small`和
+ Electra([ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://arxiv.org/abs/2003.10555)), 支持hidden_size=256的`chinese-electra-discriminator-small`和
-[Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks](https://arxiv.org/abs/1908.10084)(EMNLP 2019)
-[Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation](https://arxiv.org/abs/2004.09813)(EMNLP 2020)
-[Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks](https://arxiv.org/abs/2010.08240)(arXiv 2020)
```
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
parser.add_argument("--use_gpu",type=eval,default=False,help="Whether use GPU for training, input should be True or False")
parser.add_argument("--batch_size",type=int,default=64,help="Total examples' number of a batch for training.")
parser.add_argument("--vocab_path",type=str,default="./data/term2id.dict",help="The path to vocabulary.")
parser.add_argument('--network_name',type=str,default="lstm",help="Which network you would like to choose bow, lstm, bilstm, gru, bigru, rnn, birnn, bilstm_attn, cnn and textcnn?")
parser.add_argument('--network_name',type=str,default="lstm",help="Which network you would like to choose bow, cnn, lstm or gru ?")
parser.add_argument("--params_path",type=str,default='./chekpoints/final.pdparams',help="The path of model parameter to be loaded.")
@@ -26,13 +26,13 @@ from utils import load_vocab, generate_batch, convert_example
# yapf: disable
parser=argparse.ArgumentParser(__doc__)
parser.add_argument("--epochs",type=int,default=3,help="Number of epoches for training.")
parser.add_argument('--use_gpu',type=eval,default=True,help="Whether use GPU for training, input should be True or False")
parser.add_argument("--epochs",type=int,default=10,help="Number of epoches for training.")
parser.add_argument('--use_gpu',type=eval,default=False,help="Whether use GPU for training, input should be True or False")
parser.add_argument("--lr",type=float,default=5e-4,help="Learning rate used to train.")
parser.add_argument("--save_dir",type=str,default='chekpoints/',help="Directory to save model checkpoint")
parser.add_argument("--batch_size",type=int,default=64,help="Total examples' number of a batch for training.")
parser.add_argument("--vocab_path",type=str,default="./data/term2id.dict",help="The directory to dataset.")
parser.add_argument('--network',type=str,default="cnn",help="Which network you would like to choose bow, lstm, bilstm, gru, bigru, rnn, birnn, bilstm_attn and textcnn?")
parser.add_argument("--vocab_path",type=str,default="./simnet_word_dict.txt",help="The directory to dataset.")
parser.add_argument('--network',type=str,default="lstm",help="Which network you would like to choose bow, cnn, lstm or gru ?")
parser.add_argument("--init_from_ckpt",type=str,default=None,help="The path of checkpoint to be loaded.")