提交 05e8d95e 编写于 作者: L limingqi107

optimize the gpu context switch

上级 1fde9654
......@@ -85,7 +85,7 @@ class PoolingGradGpuFwdKernel : public GpuKernel {
padded_descriptor_, padded, &beta, padded_descriptor_, padded_dx),
"cudnnPoolingBackward failed");
CalPadGrad(padded_size_ / sizeof(T), padded_dx, n_, c_, old_height_, old_width_, old_height_ + pad_height_,
CalPadGrad(output_size_ / sizeof(T), padded_dx, n_, c_, old_height_, old_width_, old_height_ + pad_height_,
old_width_ + pad_width_, pad_top_, pad_left_, dx, reinterpret_cast<cudaStream_t>(stream_ptr));
} else {
CHECK_CUDNN_RET_WITH_EXCEPT(
......
......@@ -139,16 +139,10 @@ PYBIND11_MODULE(_c_expression, m) {
.def("set_save_ms_model_flag", &mindspore::MsContext::set_save_ms_model_flag, "Set whether to save ms model.")
.def("get_save_ms_model_path", &mindspore::MsContext::save_ms_model_path, "Get path to save ms model.")
.def("set_save_ms_model_path", &mindspore::MsContext::set_save_ms_model_path, "Set path to save ms model")
.def("get_enable_gpu_summary", &mindspore::MsContext::enable_gpu_summary, "Get whether to enable gpu summary.")
.def("set_enable_gpu_summary", &mindspore::MsContext::set_enable_gpu_summary, "Set whether to enable gpu summary.")
.def("get_enable_dump", &mindspore::MsContext::enable_dump, "Get whether to enable dump.")
.def("set_enable_dump", &mindspore::MsContext::set_enable_dump, "Set whether to enable dump.")
.def("get_save_dump_path", &mindspore::MsContext::save_dump_path, "Get path to dump.")
.def("set_save_dump_path", &mindspore::MsContext::set_save_dump_path, "Set path to dump.")
.def("get_enable_dynamic_mem_pool", &mindspore::MsContext::enable_dynamic_mem_pool,
"Get whether to enable dynamic mem pool.")
.def("set_enable_dynamic_mem_pool", &mindspore::MsContext::set_enable_dynamic_mem_pool,
"Set whether to enable dynamic mem pool.")
.def("set_graph_memory_max_size", &mindspore::MsContext::set_graph_memory_max_size, "set graph memory max size.")
.def("set_variable_memory_max_size", &mindspore::MsContext::set_variable_memory_max_size,
"set variable memory max size");
......
......@@ -265,14 +265,6 @@ class _Context:
def save_ms_model_path(self, save_ms_model_path):
self._context_handle.set_save_ms_model_path(save_ms_model_path)
@property
def enable_gpu_summary(self):
return self._context_handle.get_enable_gpu_summary()
@enable_gpu_summary.setter
def enable_gpu_summary(self, enable_gpu_summary):
self._context_handle.set_enable_gpu_summary(enable_gpu_summary)
@property
def enable_auto_mixed_precision(self):
return self._context_handle.get_auto_mixed_precision_flag()
......@@ -315,14 +307,6 @@ class _Context:
"""Sets whether to save the network class name in the scope."""
self._thread_local_info.reserve_class_name_in_scope = reserve_class_name_in_scope
@property
def enable_dynamic_memory(self):
return self._context_handle.get_enable_dynamic_mem_pool()
@enable_dynamic_memory.setter
def enable_dynamic_memory(self, enable_dynamic_memory):
self._context_handle.set_enable_dynamic_mem_pool(enable_dynamic_memory)
@property
def graph_memory_max_size(self):
return None
......@@ -485,9 +469,9 @@ def reset_auto_parallel_context():
@args_type_check(mode=int, precompile_only=bool, device_target=str,
device_id=int, enable_ir_fusion=bool, save_graphs=bool,
enable_task_sink=bool, save_graphs_path=str, enable_loop_sink=bool,
enable_mem_reuse=bool, save_ms_model=bool, save_ms_model_path=str, enable_gpu_summary=bool,
enable_mem_reuse=bool, save_ms_model=bool, save_ms_model_path=str,
enable_auto_mixed_precision=bool, enable_dump=bool, save_dump_path=str,
enable_reduce_precision=bool, enable_dynamic_memory=bool, graph_memory_max_size=str,
enable_reduce_precision=bool, graph_memory_max_size=str,
variable_memory_max_size=str)
def set_context(**kwargs):
"""
......@@ -521,7 +505,6 @@ def set_context(**kwargs):
enable_mem_reuse (bool): Whether to enable memory reuse. Default: True.
save_ms_model (bool): Whether to save lite model converted by graph. Default: False.
save_ms_model_path (str): Path to save converted lite model. Default: "."
enable_gpu_summary (bool): Whether to enable gpu summary. Default: True.
save_graphs_path (str): Path to save graphs. Default: "."
enable_auto_mixed_precision (bool): Whether to enable auto mixed precision. Default: True.
reserve_class_name_in_scope (bool) : Whether to save the network class name in the scope. Default: True.
......@@ -530,7 +513,6 @@ def set_context(**kwargs):
save_dump_path (str): When the program is executed on Ascend, operators can dump data here.
The root dump path is configured in /home/HwHiAiUser/ide_daemon/ide_daemon.cfg.
So the real dump path is "{configured root dump path}/{`save_dump_path`}". Default: ".".
enable_dynamic_memory (bool): Whether to enable dynamic memory. Default: False.
graph_memory_max_size (str): Sets graph memory max size. Default: "26GB".
variable_memory_max_size (str): Sets variable memory max size. Default: "5GB".
......@@ -547,10 +529,8 @@ def set_context(**kwargs):
>>> context.set_context(enable_mem_reuse=True)
>>> context.set_context(enable_reduce_precision=True)
>>> context.set_context(save_ms_model=True, save_ms_model_path=".")
>>> context.set_context(enable_gpu_summary=False)
>>> context.set_context(enable_dump=True, save_dump_path=".")
>>> context.set_context(reserve_class_name_in_scope=True)
>>> context.set_context(enable_dynamic_memory=True)
>>> context.set_context(graph_memory_max_size="25GB")
>>> context.set_context(variable_memory_max_size="6GB")
>>> context.set_context(mode=context.GRAPH_MODE,
......
......@@ -34,6 +34,8 @@ from mindspore.nn import TrainOneStepCell, WithLossCell
from mindspore.nn import Dense
from mindspore import amp
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
def random_normal_init(shape, mean=0.0, stddev=0.01, seed=None):
init_value = np.ones(shape).astype(np.float32) * 0.01
......@@ -324,7 +326,6 @@ def resnet50(num_classes):
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
net = resnet50(num_classes)
lr = 0.1
momentum = 0.9
......@@ -345,8 +346,6 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU", enable_mem_reuse=False,
enable_dynamic_memory=False)
net = resnet50(num_classes)
lr = 0.1
momentum = 0.9
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册