Merge pull request #33 from 123malin/bug_fix

bug fix for gradient_scale_strategy

Merge pull request #33 from 123malin/bug_fix
bug fix for gradient_scale_strategy
74625fc4 · wuzhihua · GitHub · 59367805 · d1385330 · 74625fc4
隐藏空白更改
内联并排

Showing with 13 addition and 2 deletion

core/trainers/single_trainer.py core/trainers/single_trainer.py +13 -2

未找到文件。
--- a/core/trainers/single_trainer.py
+++ b/core/trainers/single_trainer.py
@@ -248,8 +248,19 @@ class SingleTrainer(TranspileTrainer):
        _exe_strategy = fluid.ExecutionStrategy()
        # 0: kCoeffNumDevice; 1: One; 2: Customized
-        _build_strategy.gradient_scale_strategy = model_dict.get(
+        _gradient_scale_strategy = model_dict.get("gradient_scale_strategy", 0)
-            "gradient_scale_strategy", 0)
+        if _gradient_scale_strategy == 0:
+            gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.CoeffNumDevice
+        elif _gradient_scale_strategy == 1:
+            gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.One
+        elif _gradient_scale_strategy == 2:
+            gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
+        else:
+            raise ValueError(
+                "Unsurpported config. gradient_scale_strategy must be one of [0, 1, 2]."
+            )
+        _build_strategy.gradient_scale_strategy = gradient_scale_strategy
        if "thread_num" in model_dict and model_dict["thread_num"] > 1:
            _build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
            _exe_strategy.num_threads = model_dict["thread_num"]