提交 0e82b959 编写于 作者: M Megvii Engine Team

feat(mge/imperative): add sublinear options

GitOrigin-RevId: f0e917f716c0bcb700559bb3a11cc5aec97dc117
上级 e027dcbf
from .sublinear_memory_config import SublinearMemoryConfig
from .tracing import exclude_from_trace, trace
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from ..device import get_device_count
class SublinearMemoryConfig:
r"""
Configuration for sublinear memory optimization.
:param thresh_nr_try: number of samples both for searching in linear space
and around current thresh in sublinear memory optimization. Default: 10.
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_THRESH_NR_TRY'.
:param genetic_nr_iter: number of iterations to find the best checkpoints in genetic algorithm.
Default: 0.
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_NR_ITER'.
:param genetic_pool_size: number of samples for the crossover random selection
during genetic optimization. Default: 20.
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'.
:param lb_memory: memory lower bound of bottleneck size in MB for sublinear memory optimization.
It can be used to perform manual tradeoff between memory and speed. Default: 0.
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'.
:param num_worker: number of thread workers to search the optimum checkpoints
in sublinear memory optimization. Default: half of cpu number in the system.
Note: the value must be greater or equal to one.
It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_WORKERS'.
Note that the environmental variable MGB_COMP_GRAPH_OPT must be set to 'enable_sublinear_memory_opt=1'
in order for the above environmental variable to be effective.
"""
def __init__(
self,
thresh_nr_try: int = 10,
genetic_nr_iter: int = 0,
genetic_pool_size: int = 20,
lb_memory: int = 0,
num_worker: int = max(1, get_device_count("cpu") // 2),
):
assert thresh_nr_try >= 0, "thresh_nr_try must be greater or equal to zero"
self.thresh_nr_try = thresh_nr_try
assert genetic_nr_iter >= 0, "genetic_nr_iter must be greater or equal to zero"
self.genetic_nr_iter = genetic_nr_iter
assert (
genetic_pool_size >= 0
), "genetic_pool_size must be greater or equal to zero"
self.genetic_pool_size = genetic_pool_size
self.lb_memory = lb_memory
assert num_worker > 0, "num_worker must be greater or equal to one"
self.num_worker = num_worker
......@@ -7,6 +7,7 @@ from ..core.ops.special import Const
from ..core.tensor import megbrain_graph as G
from ..core.tensor.core import OpBase, apply
from ..core.tensor.raw_tensor import OpDef, RawTensor, as_raw_tensor
from .sublinear_memory_config import SublinearMemoryConfig
class TraceMismatchError(RuntimeError):
......@@ -72,11 +73,18 @@ class trace:
self.__init__(*args, **kwargs)
return self
def __init__(self, function, symbolic=False, capture_as_const=False):
def __init__(
self,
function,
symbolic=False,
capture_as_const=False,
sublinear_memory_config: SublinearMemoryConfig = None,
):
self.__wrapped__ = function
self._symbolic = symbolic
self._capture_as_const = capture_as_const
self._capture_static_shape = False
self._sublinear_memory_config = sublinear_memory_config
self._untraced = True
self._tinfo = [] # handle -> TensorInfo
......@@ -227,6 +235,7 @@ class trace:
G.OutputNode(x._LazyEvalTensor__varnode).outputs[0]
for x in lazy_eval_tensors
]
self._apply_graph_options(self._lazy_eval_graph)
self._lazy_eval_graph.compile(*readers)
self._lazy_eval_graph()
for r, x in zip(readers, lazy_eval_tensors):
......@@ -259,9 +268,26 @@ class trace:
info.exported = True
info.data_read = True
def _apply_graph_options(self, graph):
# sublinear
if self._sublinear_memory_config is not None:
graph.options.enable_sublinear_memory_opt = True
sublinear_config = graph.options.sublinear_mem_config
sublinear_config.lb_memory = self._sublinear_memory_config.lb_memory
sublinear_config.genetic_nr_iter = (
self._sublinear_memory_config.genetic_nr_iter
)
sublinear_config.genetic_pool_size = (
self._sublinear_memory_config.genetic_pool_size
)
sublinear_config.thresh_nr_try = self._sublinear_memory_config.thresh_nr_try
sublinear_config.num_worker = self._sublinear_memory_config.num_worker
def _compile(self):
graph = self._graph = G.Graph()
graph.options.no_force_inplace = True
self._apply_graph_options(graph)
# graph.options.graph_opt_level = 0
need_reset_nodes = self._need_reset_nodes = []
# links enforce ordering of I/O nodes
......
......@@ -119,6 +119,7 @@ void init_graph_rt(py::module m) {
DEF_READWRITE(enable_memory_swap)
DEF_READWRITE(comp_node_seq_record_level)
DEF_READWRITE(no_force_inplace)
DEF_READWRITE(sublinear_mem_config)
// DEF_READWRITE(eager_evaluation)
// DEF_READWRITE(imperative_proxy_graph)
// DEF_READWRITE(extra_vardeps)
......@@ -142,6 +143,16 @@ void init_graph_rt(py::module m) {
#undef CURRENT_CLASS
#define CURRENT_CLASS cg::ComputingGraph::Options::SublinearMemConfig
py::class_<cg::ComputingGraph::Options::SublinearMemConfig>(PyComputingGraphOptions, "SublinearMemConfig")
DEF_READWRITE(thresh_nr_try)
DEF_READWRITE(genetic_nr_iter)
DEF_READWRITE(genetic_pool_size)
DEF_READWRITE(lb_memory)
DEF_READWRITE(num_worker);
#undef CURRENT_CLASS
auto common = rel_import("common", m, 1);
common.def("invoke_op", [](const OpDef& def, const std::vector<cg::VarNode*> inputs, cg::ComputingGraph* graph) {
......
......@@ -19,6 +19,7 @@ import megengine.functional as F
from megengine import jit
from megengine.core._trace_option import set_tensor_shape
from megengine.functional.debug_param import set_conv_execution_strategy
from megengine.jit import SublinearMemoryConfig
from megengine.module import AvgPool2d, BatchNorm2d, Conv2d, Linear, Module
from megengine.optimizer import SGD
from megengine.tensor import Tensor
......@@ -217,14 +218,14 @@ def test_correctness():
set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE")
run_train(model_path, False, False, max_err=1e-5)
# run_test(model_path, True, False)
# run_test(model_path, True, True)
run_train(model_path, True, False, max_err=1e-5)
run_train(model_path, True, True, max_err=1e-5)
# sublinear
# config = SublinearMemoryConfig(genetic_nr_iter=10)
# run_test(
# model_path, True, True, sublinear_memory_config=config, max_err=1e-5,
# )
config = SublinearMemoryConfig(genetic_nr_iter=10)
run_train(
model_path, True, True, sublinear_memory_config=config, max_err=1e-5,
)
run_eval(model_path, False, max_err=1e-7)
# run_eval(model_path, True, max_err=1e-7) # XXX: fix me
run_eval(model_path, True, max_err=1e-7)
......@@ -298,23 +298,23 @@ class trace:
if self._sublinear_memory_config is not None:
cg.set_option("enable_sublinear_memory_opt", True)
cg.set_option(
"sublinear_mem_cofig.lb_memory",
"sublinear_mem_config.lb_memory",
self._sublinear_memory_config.lb_memory,
)
cg.set_option(
"sublinear_mem_cofig.genetic_nr_iter",
"sublinear_mem_config.genetic_nr_iter",
self._sublinear_memory_config.genetic_nr_iter,
)
cg.set_option(
"sublinear_mem_cofig.genetic_pool_size",
"sublinear_mem_config.genetic_pool_size",
self._sublinear_memory_config.genetic_pool_size,
)
cg.set_option(
"sublinear_mem_cofig.thresh_nr_try",
"sublinear_mem_config.thresh_nr_try",
self._sublinear_memory_config.thresh_nr_try,
)
cg.set_option(
"sublinear_mem_cofig.num_worker",
"sublinear_mem_config.num_worker",
self._sublinear_memory_config.num_worker,
)
# pack allreduce
......
......@@ -116,11 +116,11 @@ bool _config::set_comp_graph_option(
SET_CG_OPTION(allocate_static_mem_after_graph_compile);
SET_CG_OPTION(log_level);
SET_CG_OPTION(enable_sublinear_memory_opt);
SET_CG_OPTION(sublinear_mem_cofig.lb_memory);
SET_CG_OPTION(sublinear_mem_cofig.genetic_nr_iter);
SET_CG_OPTION(sublinear_mem_cofig.genetic_pool_size);
SET_CG_OPTION(sublinear_mem_cofig.thresh_nr_try);
SET_CG_OPTION(sublinear_mem_cofig.num_worker);
SET_CG_OPTION(sublinear_mem_config.lb_memory);
SET_CG_OPTION(sublinear_mem_config.genetic_nr_iter);
SET_CG_OPTION(sublinear_mem_config.genetic_pool_size);
SET_CG_OPTION(sublinear_mem_config.thresh_nr_try);
SET_CG_OPTION(sublinear_mem_config.num_worker);
SET_CG_OPTION(enable_var_mem_defragment);
SET_CG_OPTION(eager_evaluation);
SET_CG_OPTION(enable_memory_swap);
......
......@@ -219,7 +219,7 @@ ComputingGraphImpl::Components::Components(ComputingGraphImpl* owner)
grad_manager{owner},
#if MGB_ENABLE_SUBLINEAR
seq_modifier_for_sublinear_memory{owner,
&(owner->options().sublinear_mem_cofig)},
&(owner->options().sublinear_mem_config)},
#endif
#if MGB_ENABLE_MEMORY_SWAP
memory_swap_support{owner},
......
......@@ -409,7 +409,7 @@ class ComputingGraph : public std::enable_shared_from_this<ComputingGraph>,
int genetic_pool_size = 20;
int lb_memory = 0;
int num_worker = sys::get_cpu_count() / 2;
} sublinear_mem_cofig;
} sublinear_mem_config;
//! do not re-profile to select best impl algo when input shape
//! changes (use previous algo)
......
......@@ -522,7 +522,7 @@ TEST(TestSublinearMemory, BadOpr) {
set_priority(z, 3);
graph->options().graph_opt_level = 0;
graph->options().enable_sublinear_memory_opt = 1;
graph->options().sublinear_mem_cofig.genetic_nr_iter = 50;
graph->options().sublinear_mem_config.genetic_nr_iter = 50;
auto func = graph->compile({{y, {}}, {z, {}}});
auto&& results = static_cast<cg::ComputingGraphImpl*>(graph.get())
->seq_modifier_for_sublinear_memory().prev_min_bottleneck();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册