diff --git a/PaddleNLP/language_representations_kit/BERT/README.md b/PaddleNLP/language_representations_kit/BERT/README.md
index db0e110409d76882ff8a8d68751f46cf9cfbb054..b79965967fa6a33d0ebf2784ace87e01b787761f 100644
--- a/PaddleNLP/language_representations_kit/BERT/README.md
+++ b/PaddleNLP/language_representations_kit/BERT/README.md
@@ -41,7 +41,8 @@
 - [**Fine-Tuning**: 预训练模型如何应用到特定 NLP 任务上](#nlp-任务的-fine-tuning)
   - [语句和句对分类任务](#语句和句对分类任务)
   - [阅读理解 SQuAD](#阅读理解-squad)
-- [**混合精度训练**: 利用混合精度加速训练](#混合精度训练)
+## 动态混合精度训练
+- [**动态混合精度训练**: 利用混合精度加速训练](#动态混合精度训练)
 - [**模型转换**: 如何将 BERT TensorFlow 模型转换为 Paddle Fluid 模型](#模型转换)
 - [**模型部署**: 多硬件环境模型部署支持](#模型部署)
   - [产出用于部署的 inference model](#保存-inference-model)
diff --git a/PaddleNLP/language_representations_kit/BERT/train.py b/PaddleNLP/language_representations_kit/BERT/train.py
index 52c8650ec3fccc946c67998a870b34c4c116b5f9..f176638c8295007d4efaccd1f3925221fd142484 100644
--- a/PaddleNLP/language_representations_kit/BERT/train.py
+++ b/PaddleNLP/language_representations_kit/BERT/train.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import os
 import time
+import sys
 import argparse
 import numpy as np
 import multiprocessing
@@ -321,6 +322,8 @@ def train(args):
     exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope
 
     build_strategy = fluid.BuildStrategy()
+    if sys.platform == "win32" and nccl2_num_trainers > 1:
+        raise ValueError("Windows platform doesn't support distributed training!")
     build_strategy.num_trainers = nccl2_num_trainers
     build_strategy.trainer_id = nccl2_trainer_id
     # use_ngraph is for CPU only, please refer to README_ngraph.md for details