提交 d480a0d1 编写于 作者: J JiabinYang

fix hs problem and polish code

上级 b7a4772f
...@@ -31,8 +31,9 @@ python preprocess.py --data_path ./data/1-billion-word-language-modeling-benchma ...@@ -31,8 +31,9 @@ python preprocess.py --data_path ./data/1-billion-word-language-modeling-benchma
The command line options for training can be listed by `python train.py -h`. The command line options for training can be listed by `python train.py -h`.
### Local Train: ### Local Train:
we set CPU_NUM=1 as default CPU_NUM to execute
```bash ```bash
export CPU_NUM=1 export CPU_NUM=1 && \
python train.py \ python train.py \
--train_data_path ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled \ --train_data_path ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled \
--dict_path data/1-billion_dict \ --dict_path data/1-billion_dict \
......
...@@ -113,6 +113,13 @@ class PQ_Entry(object): ...@@ -113,6 +113,13 @@ class PQ_Entry(object):
def topK(k, emb, test_emb): def topK(k, emb, test_emb):
pq = PriorityQueue(k + 1) pq = PriorityQueue(k + 1)
while not pq.empty():
try:
pq.get(False)
except Empty:
continue
pq.task_done()
if len(emb) <= k: if len(emb) <= k:
for i in range(len(emb)): for i in range(len(emb)):
x = cosine_similarity([emb[i]], [test_emb]) x = cosine_similarity([emb[i]], [test_emb])
...@@ -181,5 +188,7 @@ if __name__ == '__main__': ...@@ -181,5 +188,7 @@ if __name__ == '__main__':
# while setting infer_once please specify the dir to models file with --model_output_dir # while setting infer_once please specify the dir to models file with --model_output_dir
if args.infer_once: if args.infer_once:
infer_once(args) infer_once(args)
if args.infer_during_train: elif args.infer_during_train:
infer_during_train(args) infer_during_train(args)
else:
pass
...@@ -125,10 +125,10 @@ class Word2VecReader(object): ...@@ -125,10 +125,10 @@ class Word2VecReader(object):
for context_id in context_word_ids: for context_id in context_word_ids:
yield [target_id], [context_id], [ yield [target_id], [context_id], [
self.word_to_code[self.id_to_word[ self.word_to_code[self.id_to_word[
context_id]] target_id]]
], [ ], [
self.word_to_path[self.id_to_word[ self.word_to_path[self.id_to_word[
context_id]] target_id]]
] ]
else: else:
pass pass
......
...@@ -257,9 +257,9 @@ def train(args): ...@@ -257,9 +257,9 @@ def train(args):
optimizer = None optimizer = None
if args.with_Adam: if args.with_Adam:
optimizer = fluid.optimizer.Adam(learning_rate=1e-3) optimizer = fluid.optimizer.Adam(learning_rate=1e-4)
else: else:
optimizer = fluid.optimizer.SGD(learning_rate=1e-3) optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
optimizer.minimize(loss) optimizer.minimize(loss)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册