fix py3 and win bugs

39aef4f6 · root · a1344ac1 · 39aef4f6 · 39aef4f6 · 39aef4f6
隐藏空白更改
内联并排

Showing with 15 addition and 11 deletion

.gitignore .gitignore +4 -0

run.sh run.sh +2 -2

utils/tokenization.py utils/tokenization.py +9 -9

未找到文件。
--- a/.gitignore
+++ b/.gitignore
+*.pyc
+__pycache__
+pretrain_model
+output_model
--- a/run.sh
+++ b/run.sh
@@ -4,7 +4,7 @@
 export FLAGS_sync_nccl_allreduce=0
 export FLAGS_eager_delete_tensor_gb=1
-export CUDA_VISIBLE_DEVICES=0
+export CUDA_VISIBLE_DEVICES=2
 if [[ ! -d pretrain_model/bert ]]; then
    bash download_pretrain.sh bert
@@ -14,5 +14,5 @@ if [[ ! -d pretrain_model/ernie ]]; then
    bash download_pretrain.sh ernie
 fi
-python -u mtl_run.py
+python3.5 -u mtl_run.py
--- a/utils/tokenization.py
+++ b/utils/tokenization.py
@@ -68,15 +68,15 @@ def printable_text(text):
 def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
-    fin = io.open(vocab_file, encoding="utf8")
+    with io.open(vocab_file, encoding="utf8") as fin:
-    for num, line in enumerate(fin):
+        for num, line in enumerate(fin):
-        items = convert_to_unicode(line.strip()).split("\t")
+            items = convert_to_unicode(line.strip()).split("\t")
-        if len(items) > 2:
+            if len(items) > 2:
-            break
+                break
-        token = items[0]
+            token = items[0]
-        index = items[1] if len(items) == 2 else num
+            index = items[1] if len(items) == 2 else num
-        token = token.strip()
+            token = token.strip()
-        vocab[token] = int(index)
+            vocab[token] = int(index)
    return vocab