未验证 提交 ebef6601 编写于 作者: G gongweibao 提交者: GitHub

Destroy session first. (#30954)

Destroy session first.
上级 500f28ec
...@@ -50,29 +50,35 @@ class AscendInstance { ...@@ -50,29 +50,35 @@ class AscendInstance {
virtual ~AscendInstance() {} virtual ~AscendInstance() {}
AscendInstance() {} AscendInstance() {}
std::map<AscendString, AscendString> GetDefaultInitOptions() { std::map<AscendString, AscendString> _GetDefaultInitOptions() {
std::map<AscendString, AscendString> init_options; std::map<AscendString, AscendString> init_options;
init_options["ge.exec.deviceId"] = "0"; init_options["ge.exec.deviceId"] = "0";
init_options["ge.graphRunMode"] = "1"; init_options["ge.graphRunMode"] = "1";
return init_options; return init_options;
} }
std::map<AscendString, AscendString> GetDefaultInitSessionOptions() { std::map<AscendString, AscendString> _GetDefaultInitSessionOptions() {
std::map<AscendString, AscendString> init_options; std::map<AscendString, AscendString> init_options;
init_options["a"] = "b"; //init_options["a"] = "b";
init_options["ge.trainFlag"] = "1"; //init_options["ge.trainFlag"] = "1";
return init_options; return init_options;
} }
ge::Status InitGEForUT() { return ge::GEInitialize(GetDefaultInitOptions()); } ge::Status InitGEForUT() { return ge::GEInitialize(_GetDefaultInitOptions()); }
void InitGlobalResouces() { void InitGlobalResouces() {
LOG(INFO) << "Begin InitGlobalResouces"; LOG(INFO) << "Begin ascend InitGlobalResouces";
session_.reset(new ge::Session(GetDefaultInitSessionOptions())); session_.reset(new ge::Session(_GetDefaultInitSessionOptions()));
if (session_ == nullptr) { if (session_ == nullptr) {
LOG(FATAL) << "new session error:" << session_; LOG(FATAL) << "new session error:" << session_;
} }
LOG(INFO) << "End InitGlobalResouces"; LOG(INFO) << "End ascend InitGlobalResouces";
}
void DestroyGlobalResouces() {
LOG(INFO) << "Begin ascend DestroyGlobalResouces";
session_ = nullptr;
LOG(INFO) << "Begin ascend DestroyGlobalResouces";
} }
static std::shared_ptr<AscendInstance> GetInstance() { static std::shared_ptr<AscendInstance> GetInstance() {
......
...@@ -55,6 +55,9 @@ void BindAscendWrapper(py::module *m) { ...@@ -55,6 +55,9 @@ void BindAscendWrapper(py::module *m) {
.def("init_global_resources", .def("init_global_resources",
&framework::AscendInstance::InitGlobalResouces, &framework::AscendInstance::InitGlobalResouces,
py::call_guard<py::gil_scoped_release>()) py::call_guard<py::gil_scoped_release>())
.def("destroy_global_resources",
&framework::AscendInstance::DestroyGlobalResouces,
py::call_guard<py::gil_scoped_release>())
.def("add_ascend_subgraph", &framework::AscendInstance::AddAscendSubgraph, .def("add_ascend_subgraph", &framework::AscendInstance::AddAscendSubgraph,
py::call_guard<py::gil_scoped_release>()); py::call_guard<py::gil_scoped_release>());
} }
......
...@@ -121,8 +121,7 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ...@@ -121,8 +121,7 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra
"--run_mode", "--run_mode",
type=str, type=str,
default="collective", default="collective",
help="run mode of job, can be:collective/ps/ps-heter" help="run mode of job, can be:collective/ps/ps-heter")
)
base_group.add_argument( base_group.add_argument(
"--ascend_npus", "--ascend_npus",
...@@ -133,7 +132,6 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ...@@ -133,7 +132,6 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra
"--ascend_npus=\"0,1,2,3\" will launch four training processes each bound to one gpu." "--ascend_npus=\"0,1,2,3\" will launch four training processes each bound to one gpu."
) )
base_group.add_argument("--selected_gpus", dest="gpus") base_group.add_argument("--selected_gpus", dest="gpus")
base_group.add_argument( base_group.add_argument(
...@@ -250,6 +248,9 @@ def launch_collective(args): ...@@ -250,6 +248,9 @@ def launch_collective(args):
log_dir=args.log_dir, log_dir=args.log_dir,
envs=global_envs) envs=global_envs)
for idx, proc in enumerate(procs):
print("launch proc_id:{} idx:{}".format(proc.proc.pid, idx))
while True: while True:
alive = watch_local_trainers(procs, cluster.trainers_nranks()) alive = watch_local_trainers(procs, cluster.trainers_nranks())
......
...@@ -182,9 +182,14 @@ class AscendOptimizer(Optimizer): ...@@ -182,9 +182,14 @@ class AscendOptimizer(Optimizer):
def __init__(self, optimizer, fetch_list=[]): def __init__(self, optimizer, fetch_list=[]):
self.inner_opt = optimizer self.inner_opt = optimizer
self.fetch_list = fetch_list self.fetch_list = fetch_list
self.ascend_instance = None
def __del__(self): def __del__(self):
print("begin AscendOptimizer del")
if self.ascend_instance is not None:
self.ascend_instance.destroy_global_resources()
core.ge_finalize() core.ge_finalize()
print("end AscendOptimizer del")
def _can_apply(self): def _can_apply(self):
if not self.user_defined_strategy.ascend: if not self.user_defined_strategy.ascend:
......
...@@ -16,6 +16,7 @@ from paddle.fluid.optimizer import Optimizer ...@@ -16,6 +16,7 @@ from paddle.fluid.optimizer import Optimizer
import paddle.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
from paddle.distributed import fleet from paddle.distributed import fleet
from functools import reduce
registerd_op = {## forwards registerd_op = {## forwards
"elementwise_add": "AddParser", "elementwise_add": "AddParser",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册