From 41351679720289a0c431546a2ef7d91e3dce5e77 Mon Sep 17 00:00:00 2001
From: JiabinYang <marsyang199376@gmail.com>
Date: Fri, 28 Dec 2018 05:41:54 +0000
Subject: [PATCH] refine readme and clean code

---
 fluid/PaddleRec/word2vec/README.cn.md     | 1 +
 fluid/PaddleRec/word2vec/README.md        | 3 ++-
 fluid/PaddleRec/word2vec/data/download.sh | 4 ++++
 fluid/PaddleRec/word2vec/infer.py         | 2 --
 4 files changed, 7 insertions(+), 3 deletions(-)
 create mode 100644 fluid/PaddleRec/word2vec/data/download.sh

diff --git a/fluid/PaddleRec/word2vec/README.cn.md b/fluid/PaddleRec/word2vec/README.cn.md
index 076b3eef..7ed9ddc3 100644
--- a/fluid/PaddleRec/word2vec/README.cn.md
+++ b/fluid/PaddleRec/word2vec/README.cn.md
@@ -25,6 +25,7 @@ cd data && ./download.sh && cd ..
 ```bash
 python preprocess.py --data_path ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled --dict_path data/1-billion_dict
 ```
+如果您想使用我们支持的第三方词汇表，请将--other_dict_path设置为您存放将使用的词汇表的目录，并设置--with_other_dict使用它
 
 ## 训练
 训练的命令行选项可以通过`python train.py -h`列出。
diff --git a/fluid/PaddleRec/word2vec/README.md b/fluid/PaddleRec/word2vec/README.md
index 1c5da2a3..01e0696a 100644
--- a/fluid/PaddleRec/word2vec/README.md
+++ b/fluid/PaddleRec/word2vec/README.md
@@ -31,7 +31,8 @@ Preprocess the training data to generate a word dict.
 ```bash
 python preprocess.py --data_path ./data/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled --dict_path data/1-billion_dict
 ```
-if you would like to use our supported third party vocab, please set
+if you would like to use our supported third party vocab, please set --other_dict_path as the directory of where you
+save the vocab you will use and set --with_other_dict flag on to using it.
 
 ## Train
 The command line options for training can be listed by `python train.py -h`.
diff --git a/fluid/PaddleRec/word2vec/data/download.sh b/fluid/PaddleRec/word2vec/data/download.sh
new file mode 100644
index 00000000..22cde6d9
--- /dev/null
+++ b/fluid/PaddleRec/word2vec/data/download.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+wget http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
+tar -zxvf 1-billion-word-language-modeling-benchmark-r13output.tar.gz
diff --git a/fluid/PaddleRec/word2vec/infer.py b/fluid/PaddleRec/word2vec/infer.py
index 9ed42d1c..69844c20 100644
--- a/fluid/PaddleRec/word2vec/infer.py
+++ b/fluid/PaddleRec/word2vec/infer.py
@@ -2,8 +2,6 @@ import time
 import os
 import paddle.fluid as fluid
 import numpy as np
-from Queue import PriorityQueue
-import heapq
 import logging
 import argparse
 import preprocess
-- 
GitLab