diff --git a/dygraph/bert/README.md b/dygraph/bert/README.md
index 53e7b0e20eee7011921e5e682b9a716556e327ec..8ff43b5e432e271e1524c98fa4be371214fdee97 100644
--- a/dygraph/bert/README.md
+++ b/dygraph/bert/README.md
@@ -16,7 +16,7 @@
 
 | Model | Layers | Hidden size | Heads |Parameters |
 | :------| :------: | :------: |:------: |:------: |
-| [BERT-Base, Uncased](https://baidu-nlp.bj.bcebos.com/DYGRAPH_models%2FBERT%2Fdata.tar.gz) | 12 | 768 |12 |110M |
+| [BERT-Base, Uncased](https://baidu-nlp.bj.bcebos.com/DYGRAPH_models/BERT/data.tar.gz) | 12 | 768 |12 |110M |
 
 每个压缩包都包含了模型配置文件 `bert_config.json`、参数文件夹 `params`、动态图参数文件夹`dygraph_params` 和词汇表 `vocab.txt`；
 
diff --git a/dygraph/bert/run_classifier_multi_gpu.sh b/dygraph/bert/run_classifier_multi_gpu.sh
index 041a1091a7ed95ba1a8df475af7b98a9108065e7..1e6ba61faba4bd72a9cfcb6ce5e5f42db19d0319 100755
--- a/dygraph/bert/run_classifier_multi_gpu.sh
+++ b/dygraph/bert/run_classifier_multi_gpu.sh
@@ -4,7 +4,7 @@ BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/"
 TASK_NAME='MNLI'
 DATA_PATH="./data/glue_data/MNLI/"
 CKPT_PATH="./data/saved_model/mnli_models"
-GPU_TO_USE="0,1,2,3"
+GPU_TO_USE=0,1,2,3
 
 export CUDA_VISIBLE_DEVICES=$GPU_TO_USE
 
diff --git a/dygraph/bert/utils/init.py b/dygraph/bert/utils/init.py
index d823473d59f35ff256d97db108c128ca92a7c1fd..6b69d87e7d3a877cccedd7fd5f42788279ef1470 100644
--- a/dygraph/bert/utils/init.py
+++ b/dygraph/bert/utils/init.py
@@ -83,7 +83,13 @@ def init_pretraining_params(exe,
 
 def init_from_static_model(dir_path, cls_model, bert_config):
     def load_numpy_weight(file_name):
-        res = np.load(os.path.join(dir_path, file_name), allow_pickle=True)
+        if six.PY2:
+            res = np.load(os.path.join(dir_path, file_name), allow_pickle=True)
+        else:
+            res = np.load(
+                os.path.join(dir_path, file_name),
+                allow_pickle=True,
+                encoding='latin1')
         assert res is not None
         return res