diff --git a/tests/model/Megatron_GPT2/ds_config_func_bs4.json b/tests/model/Megatron_GPT2/ds_config_func_bs4.json index 39e4c578c980ce6638a04c9919405a93f5eee63d..ce961201e412887ab791003c235fe2c1901e8d62 100644 --- a/tests/model/Megatron_GPT2/ds_config_func_bs4.json +++ b/tests/model/Megatron_GPT2/ds_config_func_bs4.json @@ -5,6 +5,7 @@ "zero_optimization": true, "optimizer": { "type": "Adam", + "legacy_fusion": false, "params": { "lr": 0.00015, "max_grad_norm": 1.0 diff --git a/tests/model/Megatron_GPT2/ds_config_func_bs8.json b/tests/model/Megatron_GPT2/ds_config_func_bs8.json index c2f157ea56e568febc4901f193669bd5a7296927..cbb1884aa33a7721581b7968a60fa40fe698b5e7 100644 --- a/tests/model/Megatron_GPT2/ds_config_func_bs8.json +++ b/tests/model/Megatron_GPT2/ds_config_func_bs8.json @@ -5,6 +5,7 @@ "zero_optimization": true, "optimizer": { "type": "Adam", + "legacy_fusion": false, "params": { "lr": 0.00015, "max_grad_norm": 1.0 diff --git a/tests/model/Megatron_GPT2/ds_config_func_scheduler.json b/tests/model/Megatron_GPT2/ds_config_func_scheduler.json index 34a1597851567ef96bfd55604ed59decdacc7a59..3e1d01169f7a1a770666d212b40e8196eb5c16b9 100644 --- a/tests/model/Megatron_GPT2/ds_config_func_scheduler.json +++ b/tests/model/Megatron_GPT2/ds_config_func_scheduler.json @@ -5,6 +5,7 @@ "zero_optimization": true, "optimizer": { "type": "Adam", + "legacy_fusion": false, "params": { "lr": 0.00015, "max_grad_norm": 1.0 diff --git a/tests/model/Megatron_GPT2/ds_config_perf_bs16.json b/tests/model/Megatron_GPT2/ds_config_perf_bs16.json index 9b2f79d8ff8ee7cd0e3609550b19146818d2bfdc..ee9f9664f4c9d691f961dea7886fe9a4d780c3fd 100644 --- a/tests/model/Megatron_GPT2/ds_config_perf_bs16.json +++ b/tests/model/Megatron_GPT2/ds_config_perf_bs16.json @@ -6,6 +6,7 @@ "disable_allgather": true, "optimizer": { "type": "Adam", + "legacy_fusion": false, "params": { "lr": 0.00015, "max_grad_norm": 1.0 diff --git a/tests/model/Megatron_GPT2/ds_config_perf_bs32.json b/tests/model/Megatron_GPT2/ds_config_perf_bs32.json index 26800165cde7134f25f743142c7e34ee31a5d2ec..15dc107b4798f6c6b07b67d0119e822a49596d58 100644 --- a/tests/model/Megatron_GPT2/ds_config_perf_bs32.json +++ b/tests/model/Megatron_GPT2/ds_config_perf_bs32.json @@ -6,6 +6,7 @@ "disable_allgather": true, "optimizer": { "type": "Adam", + "legacy_fusion": false, "params": { "lr": 0.00015, "max_grad_norm": 1.0 diff --git a/tests/model/Megatron_GPT2/ds_config_perf_bs8.json b/tests/model/Megatron_GPT2/ds_config_perf_bs8.json index 99a8f5ec15cf2309f306099cde08ef3bd5a2b74b..0cc5a347a6a1e65727a47555c0b0e828330f9b57 100644 --- a/tests/model/Megatron_GPT2/ds_config_perf_bs8.json +++ b/tests/model/Megatron_GPT2/ds_config_perf_bs8.json @@ -6,6 +6,7 @@ "disable_allgather": true, "optimizer": { "type": "Adam", + "legacy_fusion": false, "params": { "lr": 0.00015, "max_grad_norm": 1.0