未验证 提交 c61e23b4 编写于 作者: A Arash Ashari 提交者: GitHub

adding BingSqaud e2e test (#214)

* adding BingSqaud e2e test

* updating the draft test; bring final step under try section

* finalizinf test for base deepspeed and deepspeed with ZeRO

* applying the comment (thanks Jeff); fixed formatting
上级 ccaa9901
import subprocess as sp
import datetime
import os
from math import isclose
import sys
import pytest
import json
import argparse
sys.path.append("../../../DeepSpeedExamples/BingBertSquad")
import evaluate as eval
squad_dir = "/data/BingBertSquad"
base_dir = "../../../DeepSpeedExamples/BingBertSquad"
script_file_name = "run_squad_deepspeed.sh"
model_file_name = "training_state_checkpoint_162.tar"
eval_file_name = "dev-v1.1.json"
pred_file_name = "predictions.json"
num_gpus = "4"
timeout_sec = 5 * 60 * 60 # 5 hours
eval_version = "1.1"
def create_config_file(tmpdir, zeroenabled=False):
config_dict = {
"train_batch_size": 24,
"train_micro_batch_size_per_gpu": 6,
"steps_per_print": 10,
"optimizer": {
"type": "Adam",
"params": {
"lr": 3e-5,
"weight_decay": 0.0,
"bias_correction": False
}
},
"gradient_clipping": 1.0,
"fp16": {
"enabled": True
}
}
config_dict["zero_optimization"] = zeroenabled
config_path = os.path.join(tmpdir, 'temp_config.json')
with open(config_path, 'w') as fd:
json.dump(config_dict, fd)
return config_path
def test_e2e_squad_deepspeed_base(tmpdir):
config_file = create_config_file(tmpdir)
# base run results => {"exact_match": 83.9829706717124, "f1": 90.71138132004097}
expected_exact_match = 83.98
expected_f1 = 90.71
model_file = os.path.join(squad_dir, model_file_name)
eval_file = os.path.join(squad_dir, eval_file_name)
output_dir = os.path.join(tmpdir, "output")
pred_file = os.path.join(output_dir, pred_file_name)
proc = sp.Popen([
"bash",
script_file_name,
num_gpus,
model_file,
squad_dir,
output_dir,
config_file
],
cwd=base_dir)
try:
proc.communicate(timeout=timeout_sec)
if os.path.exists(pred_file):
eval_result = eval.evaluate(eval_version, eval_file, pred_file)
print("evaluation result: ", json.dumps(eval_result))
assert isclose(eval_result["exact_match"],
expected_exact_match,
abs_tol=1e-2)
assert isclose(eval_result["f1"], expected_f1, abs_tol=1e-2)
else:
pytest.fail("Error: Run Failed")
except sp.TimeoutExpired:
proc.kill()
pytest.fail("Error: Timeout")
except sp.CalledProcessError:
pytest.fail("Error: Run Failed")
def test_e2e_squad_deepspeed_zero(tmpdir):
config_file = create_config_file(tmpdir, True)
# base run results => {"exact_match": 84.1438032166509, "f1": 90.89776136505441}
expected_exact_match = 84.14
expected_f1 = 90.89
model_file = os.path.join(squad_dir, model_file_name)
eval_file = os.path.join(squad_dir, eval_file_name)
output_dir = os.path.join(tmpdir, "output")
pred_file = os.path.join(output_dir, pred_file_name)
proc = sp.Popen([
"bash",
script_file_name,
num_gpus,
model_file,
squad_dir,
output_dir,
config_file
],
cwd=base_dir)
try:
proc.communicate(timeout=timeout_sec)
if os.path.exists(pred_file):
eval_result = eval.evaluate(eval_version, eval_file, pred_file)
print("evaluation result: ", json.dumps(eval_result))
assert isclose(eval_result["exact_match"],
expected_exact_match,
abs_tol=1e-2)
assert isclose(eval_result["f1"], expected_f1, abs_tol=1e-2)
else:
pytest.fail("Error: Run Failed")
except sp.TimeoutExpired:
proc.kill()
pytest.fail("Error: Timeout")
except sp.CalledProcessError:
pytest.fail("Error: Run Failed")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册