未验证 提交 ebef6601 编写于 作者: G gongweibao 提交者: GitHub

Destroy session first. (#30954)

Destroy session first.
上级 500f28ec
......@@ -50,29 +50,35 @@ class AscendInstance {
virtual ~AscendInstance() {}
AscendInstance() {}
std::map<AscendString, AscendString> GetDefaultInitOptions() {
std::map<AscendString, AscendString> _GetDefaultInitOptions() {
std::map<AscendString, AscendString> init_options;
init_options["ge.exec.deviceId"] = "0";
init_options["ge.graphRunMode"] = "1";
return init_options;
}
std::map<AscendString, AscendString> GetDefaultInitSessionOptions() {
std::map<AscendString, AscendString> _GetDefaultInitSessionOptions() {
std::map<AscendString, AscendString> init_options;
init_options["a"] = "b";
init_options["ge.trainFlag"] = "1";
//init_options["a"] = "b";
//init_options["ge.trainFlag"] = "1";
return init_options;
}
ge::Status InitGEForUT() { return ge::GEInitialize(GetDefaultInitOptions()); }
ge::Status InitGEForUT() { return ge::GEInitialize(_GetDefaultInitOptions()); }
void InitGlobalResouces() {
LOG(INFO) << "Begin InitGlobalResouces";
session_.reset(new ge::Session(GetDefaultInitSessionOptions()));
LOG(INFO) << "Begin ascend InitGlobalResouces";
session_.reset(new ge::Session(_GetDefaultInitSessionOptions()));
if (session_ == nullptr) {
LOG(FATAL) << "new session error:" << session_;
}
LOG(INFO) << "End InitGlobalResouces";
LOG(INFO) << "End ascend InitGlobalResouces";
}
void DestroyGlobalResouces() {
LOG(INFO) << "Begin ascend DestroyGlobalResouces";
session_ = nullptr;
LOG(INFO) << "Begin ascend DestroyGlobalResouces";
}
static std::shared_ptr<AscendInstance> GetInstance() {
......
......@@ -55,6 +55,9 @@ void BindAscendWrapper(py::module *m) {
.def("init_global_resources",
&framework::AscendInstance::InitGlobalResouces,
py::call_guard<py::gil_scoped_release>())
.def("destroy_global_resources",
&framework::AscendInstance::DestroyGlobalResouces,
py::call_guard<py::gil_scoped_release>())
.def("add_ascend_subgraph", &framework::AscendInstance::AddAscendSubgraph,
py::call_guard<py::gil_scoped_release>());
}
......
......@@ -121,8 +121,7 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra
"--run_mode",
type=str,
default="collective",
help="run mode of job, can be:collective/ps/ps-heter"
)
help="run mode of job, can be:collective/ps/ps-heter")
base_group.add_argument(
"--ascend_npus",
......@@ -133,7 +132,6 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra
"--ascend_npus=\"0,1,2,3\" will launch four training processes each bound to one gpu."
)
base_group.add_argument("--selected_gpus", dest="gpus")
base_group.add_argument(
......@@ -250,6 +248,9 @@ def launch_collective(args):
log_dir=args.log_dir,
envs=global_envs)
for idx, proc in enumerate(procs):
print("launch proc_id:{} idx:{}".format(proc.proc.pid, idx))
while True:
alive = watch_local_trainers(procs, cluster.trainers_nranks())
......
......@@ -182,9 +182,14 @@ class AscendOptimizer(Optimizer):
def __init__(self, optimizer, fetch_list=[]):
self.inner_opt = optimizer
self.fetch_list = fetch_list
self.ascend_instance = None
def __del__(self):
print("begin AscendOptimizer del")
if self.ascend_instance is not None:
self.ascend_instance.destroy_global_resources()
core.ge_finalize()
print("end AscendOptimizer del")
def _can_apply(self):
if not self.user_defined_strategy.ascend:
......
......@@ -16,6 +16,7 @@ from paddle.fluid.optimizer import Optimizer
import paddle.fluid.core as core
import numpy as np
from paddle.distributed import fleet
from functools import reduce
registerd_op = {## forwards
"elementwise_add": "AddParser",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册