diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c4ef52e64df2106fdcc137b5d9e7d2ad62e0b8e..833bd064bd7ebde0a33e4446dfdbe51364756cff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -697,8 +697,10 @@ endif() if(MGE_WITH_PYTHON_MODULE) if(MGE_BUILD_IMPERATIVE_RT) add_subdirectory(imperative) + message("-- Enable imperative python wrapper runtime") else() add_subdirectory(python_module) + message("-- Enable legacy python wrapper runtime") endif() endif() diff --git a/dnn/src/common/utils.h b/dnn/src/common/utils.h index 449c9b04eff77e2d2f286a42e395a5f6df9e633c..66807c964afbc814ed5c6f72f94d71699004bb19 100644 --- a/dnn/src/common/utils.h +++ b/dnn/src/common/utils.h @@ -342,7 +342,11 @@ template struct SafeMultiplies; template +#if __cplusplus >= 201703L +struct _SafeMultipliesImplUnsigned { +#else struct _SafeMultipliesImplUnsigned : public std::binary_function { +#endif static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; static size_t clz(unsigned x) { diff --git a/dnn/test/CMakeLists.txt b/dnn/test/CMakeLists.txt index b37be5d9a1aa3df0919be1795693424a5681eca3..5d24a6131891a04cba20b56db7ce2be4ce3b1e37 100644 --- a/dnn/test/CMakeLists.txt +++ b/dnn/test/CMakeLists.txt @@ -70,8 +70,10 @@ if (MEG_WITH_ROCM) target_link_libraries (megdnn_test ${MGE_ROCM_LIBS}) endif () -if(APPLE OR ANDROID) - target_link_libraries(megdnn_test dl) -else() - target_link_libraries(megdnn_test dl rt) +if(UNIX) + if(APPLE OR ANDROID) + target_link_libraries(megdnn_test dl) + else() + target_link_libraries(megdnn_test dl rt) + endif() endif() diff --git a/dnn/test/common/mesh_indexing.h b/dnn/test/common/mesh_indexing.h index 27612212cd01c190eb4cb1e15a9be1f2f7996aca..c3db52233fdbe2835481af5dd8fbe195a2501e1b 100644 --- a/dnn/test/common/mesh_indexing.h +++ b/dnn/test/common/mesh_indexing.h @@ -89,7 +89,7 @@ public: auto ptr = tensor.ptr(); for (size_t n = 0; n < size; ++n) { std::set used; - std::random_shuffle(seq.begin(), seq.end()); + COMPAT_RANDOM(seq.begin(), seq.end()); for (size_t step = 0; step < stride; ++step) { megdnn_assert(used.size() < m_size); ptr[n * stride + step] = seq[step]; diff --git a/dnn/test/common/rng.cpp b/dnn/test/common/rng.cpp index 4f7200bb60b9d0b88b5ebd79c2c8454eb43ded10..10c7f213a169f411e230016c7acc9c8293ecfc48 100644 --- a/dnn/test/common/rng.cpp +++ b/dnn/test/common/rng.cpp @@ -75,7 +75,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) { i2f.i = static_cast(x); m_sequence.push_back(i2f.f); } - std::random_shuffle(m_sequence.begin(), m_sequence.end()); + COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); } Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { @@ -99,7 +99,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { m_sequence.push_back(i2f.f); } - std::random_shuffle(m_sequence.begin(), m_sequence.end()); + COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); } void Float16PeriodicalRNG::gen(const TensorND& tensor) { diff --git a/dnn/test/common/rng.h b/dnn/test/common/rng.h index 7af67117573ae87f6e3d304ccfa19f6ff17690c2..f20c0e5f4604bd4ac0f4ec2d4b9009c6061e962d 100644 --- a/dnn/test/common/rng.h +++ b/dnn/test/common/rng.h @@ -19,6 +19,16 @@ namespace megdnn { namespace test { +#if __cplusplus >= 201703L +#define COMPAT_RANDOM(begin, end) \ + { \ + std::default_random_engine rng_engine; \ + std::shuffle(begin, end, rng_engine); \ + } +#else +#define COMPAT_RANDOM(begin, end) std::random_shuffle(begin, end); +#endif + class RNG { protected: class RNGxorshf; diff --git a/dnn/test/cuda/argmxx.cpp b/dnn/test/cuda/argmxx.cpp index e90333ad98830bc99b4464b0dbbd23ba9e75babc..89e6cadb81a5621136ec9f8fc511313b7918b552 100644 --- a/dnn/test/cuda/argmxx.cpp +++ b/dnn/test/cuda/argmxx.cpp @@ -24,15 +24,16 @@ class ArgmxxRNG final: public RNG { void gen(const TensorND &tensor) override { auto offset = tensor.layout.span().low_elem; auto nr_elems = tensor.layout.span().dist_elem(); -#define cb(DType) \ - if (tensor.layout.dtype == DType()) { \ - using ctype = typename DTypeTrait::ctype; \ - auto ptr = tensor.ptr(); \ - for (size_t i = 0; i < nr_elems; ++i) { \ - ptr[offset+i] = i; \ - } \ - std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ - } + +#define cb(DType) \ + if (tensor.layout.dtype == DType()) { \ + using ctype = typename DTypeTrait::ctype; \ + auto ptr = tensor.ptr(); \ + for (size_t i = 0; i < nr_elems; ++i) { \ + ptr[offset + i] = i; \ + } \ + COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ + } MEGDNN_FOREACH_COMPUTING_DTYPE(cb); #undef cb } diff --git a/dnn/test/cuda/argsort.cpp b/dnn/test/cuda/argsort.cpp index 7c1f57524e94e52c06c2ad17a5726d58822b505a..b8779f72f4a125f1568320d9bc3377a7c266b654 100644 --- a/dnn/test/cuda/argsort.cpp +++ b/dnn/test/cuda/argsort.cpp @@ -32,7 +32,7 @@ class ArgsortRNG final : public RNG { } else { for (int i = 0; i < n; ++i) ptr[i] = static_cast(i - n / 2); - std::random_shuffle(ptr, ptr + n); + COMPAT_RANDOM(ptr, ptr + n); } } @@ -86,7 +86,7 @@ void run_backward_test(Handle* handle, DType dtype) { for (size_t j = 0; j < n; ++j) { ptr[j] = j; } - std::random_shuffle(ptr, ptr + n); + COMPAT_RANDOM(ptr, ptr + n); ptr += n; } } diff --git a/dnn/test/cuda/relayout.cpp b/dnn/test/cuda/relayout.cpp index 24d1aebfc534ba3f2d3cc25b54de54572ecef00a..a5fdc4f906b1f9e4cd64bf80e54d1c56b2abf71b 100644 --- a/dnn/test/cuda/relayout.cpp +++ b/dnn/test/cuda/relayout.cpp @@ -361,9 +361,8 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_7) { for (size_t r = 0; r < _dim.size(); r++) permutation[r] = r; for (int nsample = 0; nsample < 50; nsample++) { - std::random_shuffle(_dim.begin(), _dim.end()); - - std::random_shuffle(permutation.begin(), permutation.end()); + COMPAT_RANDOM(_dim.begin(), _dim.end()); + COMPAT_RANDOM(permutation.begin(), permutation.end()); if (!isTrivial(permutation)) { run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5], _dim[6]}, @@ -451,9 +450,10 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_5) { printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re); // printVec(dim); - std::random_shuffle(dim.begin(), dim.end()); + COMPAT_RANDOM(dim.begin(), dim.end()); + while (isTrivial(permutation)) { - std::random_shuffle(permutation.begin(), permutation.end()); + COMPAT_RANDOM(permutation.begin(), permutation.end()); } run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()}, @@ -603,8 +603,9 @@ TEST_F(CUDA, BENCHMARK_LAST_CONTIG_ALIGN_TEST) { for (size_t r = 0; r < _dim.size(); r++) permutation[r] = r; for (int nsample = 0; nsample < 20; nsample++) { - std::random_shuffle(_dim.begin(), _dim.end() - 1); - std::random_shuffle(permutation.begin(), permutation.end() - 1); + COMPAT_RANDOM(_dim.begin(), _dim.end() - 1); + + COMPAT_RANDOM(permutation.begin(), permutation.end() - 1); if (nsample < 5) _dim[5] = (u.gen_single_val() / 4 + 1) * 4; diff --git a/dnn/test/cuda/sleep.cpp b/dnn/test/cuda/sleep.cpp index 5395b04276fb57c315f565873d68a9041a3f96ad..52d2a05ef8f1199c9d484dbe0888f1162260e128 100644 --- a/dnn/test/cuda/sleep.cpp +++ b/dnn/test/cuda/sleep.cpp @@ -24,7 +24,7 @@ using namespace test; TEST_F(CUDA, SLEEP) { - auto opr = this->handle_cuda()->create_operator(); + auto opr = this->handle_cuda()->create_operator(); auto run = [&](float time) -> double { opr->param() = {time}; diff --git a/dnn/test/rocm/argmxx.cpp b/dnn/test/rocm/argmxx.cpp index f94b259e854ff3935201a111cd1d755425b1dcde..1fada4e58c7b3ef4e9b53fe6529fb81321a4e9dc 100644 --- a/dnn/test/rocm/argmxx.cpp +++ b/dnn/test/rocm/argmxx.cpp @@ -24,16 +24,17 @@ class ArgmxxRNG final: public RNG { void gen(const TensorND &tensor) override { auto offset = tensor.layout.span().low_elem; auto nr_elems = tensor.layout.span().dist_elem(); -#define cb(DType) \ - if (tensor.layout.dtype == DType()) { \ - using ctype = typename DTypeTrait::ctype; \ - auto ptr = tensor.ptr(); \ - for (size_t i = 0; i < nr_elems; ++i) { \ - ptr[offset+i] = i; \ - } \ - std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ - return; \ - } + +#define cb(DType) \ + if (tensor.layout.dtype == DType()) { \ + using ctype = typename DTypeTrait::ctype; \ + auto ptr = tensor.ptr(); \ + for (size_t i = 0; i < nr_elems; ++i) { \ + ptr[offset + i] = i; \ + } \ + COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ + return; \ + } MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); #undef cb megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s", diff --git a/imperative/CMakeLists.txt b/imperative/CMakeLists.txt index 55a97a20f5ea2b2b92e33bb92ca87a31d76230a5..3bbdeffd0befd82baa263a72ec689eb90cfd8f53 100644 --- a/imperative/CMakeLists.txt +++ b/imperative/CMakeLists.txt @@ -76,7 +76,11 @@ add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT}) add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) -target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) +if (APPLE OR MSVC OR WIN32) + target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn) +else() + target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) +endif() if (MGE_WITH_DISTRIBUTED) message("Imperative configured to link megray") target_link_libraries(${MODULE_NAME} PRIVATE megray) @@ -91,6 +95,10 @@ set_target_properties(${MODULE_NAME} PROPERTIES SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core ) +if (APPLE OR MSVC OR WIN32) + message("-- overwriting SUFFIX at macos and windows before config by set_target_properties") + pybind11_extension(${MODULE_NAME}) +endif() add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) diff --git a/imperative/python/megengine/__init__.py b/imperative/python/megengine/__init__.py index f27cdc7270dfb0dd99f640611906e4b0d7a03757..ef9b67d604df795a4a7580fe76b1f31eccebb419 100644 --- a/imperative/python/megengine/__init__.py +++ b/imperative/python/megengine/__init__.py @@ -8,6 +8,67 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import os import sys +import platform +import ctypes + +if sys.platform == "win32": + lib_path = os.path.join(os.path.dirname(__file__), "core/lib") + dll_paths = list(filter(os.path.exists, [lib_path,])) + assert len(dll_paths) > 0 + + kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True) + has_load_library_attr = hasattr(kernel32, "AddDllDirectory") + old_error_mode = kernel32.SetErrorMode(0x0001) + + kernel32.LoadLibraryW.restype = ctypes.c_void_p + if has_load_library_attr: + kernel32.AddDllDirectory.restype = ctypes.c_void_p + kernel32.LoadLibraryExW.restype = ctypes.c_void_p + + for dll_path in dll_paths: + if sys.version_info >= (3, 8): + os.add_dll_directory(dll_path) + elif has_load_library_attr: + res = kernel32.AddDllDirectory(dll_path) + if res is None: + err = ctypes.WinError(ctypes.get_last_error()) + err.strerror += ' Error adding "{}" to the DLL search PATH.'.format( + dll_path + ) + raise err + else: + print("WARN: python or OS env have some issue, may load DLL failed!!!") + + import glob + + dlls = glob.glob(os.path.join(lib_path, "*.dll")) + path_patched = False + for dll in dlls: + is_loaded = False + if has_load_library_attr: + res = kernel32.LoadLibraryExW(dll, None, 0x00001100) + last_error = ctypes.get_last_error() + if res is None and last_error != 126: + err = ctypes.WinError(last_error) + err.strerror += ' Error loading "{}" or one of its dependencies.'.format( + dll + ) + raise err + elif res is not None: + is_loaded = True + if not is_loaded: + if not path_patched: + os.environ["PATH"] = ";".join(dll_paths + [os.environ["PATH"]]) + path_patched = True + res = kernel32.LoadLibraryW(dll) + if res is None: + err = ctypes.WinError(ctypes.get_last_error()) + err.strerror += ' Error loading "{}" or one of its dependencies.'.format( + dll + ) + raise err + + kernel32.SetErrorMode(old_error_mode) from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func from .device import * diff --git a/imperative/python/megengine/utils/max_recursion_limit.py b/imperative/python/megengine/utils/max_recursion_limit.py index 0870b7fa0e48bff3bc53aa98d2206ae81b1d2aaa..d7bce6e8b493d2c0e89948d521c905209665a2e8 100644 --- a/imperative/python/megengine/utils/max_recursion_limit.py +++ b/imperative/python/megengine/utils/max_recursion_limit.py @@ -6,10 +6,14 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -import resource +import platform import sys import threading +# Windows do not imp resource package +if platform.system() != "Windows": + import resource + class AlternativeRecursionLimit: r"""A reentrant context manager for setting global recursion limits. @@ -28,16 +32,24 @@ class AlternativeRecursionLimit: with self.lock: if self.count == 0: self.orig_py_limit = sys.getrecursionlimit() + if platform.system() != "Windows": ( self.orig_rlim_stack_soft, self.orig_rlim_stack_hard, ) = resource.getrlimit(resource.RLIMIT_STACK) - resource.setrlimit( - resource.RLIMIT_STACK, - (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), - ) - # increase recursion limit - sys.setrecursionlimit(self.new_py_limit) + # FIXME: https://bugs.python.org/issue34602, python3 release version + # on Macos always have this issue, not all user install python3 from src + try: + resource.setrlimit( + resource.RLIMIT_STACK, + (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), + ) + except ValueError as exc: + if platform.system() != "Darwin": + raise exc + + # increase recursion limit + sys.setrecursionlimit(self.new_py_limit) self.count += 1 def __exit__(self, type, value, traceback): @@ -45,10 +57,16 @@ class AlternativeRecursionLimit: self.count -= 1 if self.count == 0: sys.setrecursionlimit(self.orig_py_limit) - resource.setrlimit( - resource.RLIMIT_STACK, - (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), - ) + + if platform.system() != "Windows": + try: + resource.setrlimit( + resource.RLIMIT_STACK, + (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), + ) + except ValueError as exc: + if platform.system() != "Darwin": + raise exc _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) diff --git a/imperative/python/setup.py b/imperative/python/setup.py index e583cce4412f89331ef8de62eb1455b46f75a524..c788b75c93b38c44159025c712108918e484616b 100644 --- a/imperative/python/setup.py +++ b/imperative/python/setup.py @@ -9,6 +9,7 @@ import os import re import pathlib +import platform from distutils.file_util import copy_file from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext as _build_ext @@ -29,7 +30,10 @@ class build_ext(_build_ext): extdir.parent.mkdir(parents=True, exist_ok=True) modpath = self.get_ext_fullname(ext.name).split('.') - modpath[-1] += '.so' + if platform.system() == 'Windows': + modpath[-1] += '.pyd' + else: + modpath[-1] += '.so' modpath = str(pathlib.Path(*modpath).resolve()) copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) @@ -47,6 +51,14 @@ if local_version: __version__ = '{}+{}'.format(__version__, local_version) packages = find_packages(exclude=['test']) +package_data = [ + str(f.relative_to('megengine')) + for f in pathlib.Path('megengine', 'core', 'include').glob('**/*') +] +package_data += [ + str(f.relative_to('megengine')) + for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') +] with open('requires.txt') as f: requires = f.read().splitlines() @@ -63,6 +75,9 @@ setup_kwargs = dict( author='Megvii Engine Team', author_email=email, packages=packages, + package_data={ + 'megengine': package_data, + }, ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], install_requires=requires, extras_require={ diff --git a/imperative/python/src/helper.cpp b/imperative/python/src/helper.cpp index a1b8b27759e7b3873d60223023fa84b1049dd687..13c16099c218dd9693a02f43539a24863d7126cd 100644 --- a/imperative/python/src/helper.cpp +++ b/imperative/python/src/helper.cpp @@ -9,15 +9,6 @@ #include "megbrain/utils/mempool.h" #include "./numpy_dtypes.h" -/* - * demangle typeid, see - * http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname - */ -#ifdef __GNUG__ -#include -#include -#include - namespace py = pybind11; PyTaskDipatcher py_task_q = {}; @@ -34,10 +25,18 @@ py::module rel_import(py::str name, py::module m, int level) { return import(name, m.attr("__dict__"), py::arg("level")=level); } +/* + * demangle typeid, see + * http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname + */ +#ifdef __GNUG__ +#include +#include +#include + namespace { std::string demangle_typeid(const char* name) { - int status = -4; // some arbitrary value to eliminate the compiler warning // enable c++11 by passing the flag -std=c++11 to g++ @@ -48,7 +47,7 @@ std::string demangle_typeid(const char* name) { return (status==0) ? res.get() : name ; } -} +} // namespace #else namespace { diff --git a/imperative/python/src/utils.cpp b/imperative/python/src/utils.cpp index b0e615a437e63d9aade2a1d17f2c1bc56ec0aa8b..3d4548a2747628594f19f0a17ca2a080d033ba9e 100644 --- a/imperative/python/src/utils.cpp +++ b/imperative/python/src/utils.cpp @@ -1,4 +1,8 @@ #include "utils.h" +#ifdef WIN32 +#include +#include +#endif #include #include diff --git a/imperative/python/test/integration/test_dp_correctness.py b/imperative/python/test/integration/test_dp_correctness.py index 5719136942cced84a8e17f0bc0351f1b5d5c618c..b706adb6a1914fbf79d852f123bc4ca7ac598d35 100644 --- a/imperative/python/test/integration/test_dp_correctness.py +++ b/imperative/python/test/integration/test_dp_correctness.py @@ -8,6 +8,7 @@ # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import multiprocessing as mp import os +import platform import re import subprocess import sys @@ -196,6 +197,9 @@ def run_test( @pytest.mark.isolated_distributed +@pytest.mark.skipif( + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" +) def test_dp_correctness(): model_name = "mnist_model_with_test.mge" model_path = os.path.join(os.path.dirname(__file__), model_name) diff --git a/imperative/python/test/unit/functional/test_distributed.py b/imperative/python/test/unit/functional/test_distributed.py index 9ff2031907b51240faccb2ea30dd23619bb88d41..70b30fb28ae20258363c729446f4fc3592922ee7 100644 --- a/imperative/python/test/unit/functional/test_distributed.py +++ b/imperative/python/test/unit/functional/test_distributed.py @@ -35,7 +35,7 @@ from megengine.functional.distributed import ( platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_reduce_sum(): @@ -77,7 +77,7 @@ def test_reduce_sum(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_broadcast(): @@ -115,7 +115,7 @@ def test_broadcast(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_all_gather(): @@ -154,7 +154,7 @@ def test_all_gather(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_reduce_scatter_sum(): @@ -193,7 +193,7 @@ def test_reduce_scatter_sum(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_all_reduce_sum(): @@ -232,7 +232,7 @@ def test_all_reduce_sum(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_all_reduce_max(): @@ -271,7 +271,7 @@ def test_all_reduce_max(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_all_reduce_min(): @@ -310,7 +310,7 @@ def test_all_reduce_min(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_gather(): @@ -352,7 +352,7 @@ def test_gather(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_scatter(): @@ -390,7 +390,7 @@ def test_scatter(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_all_to_all(): @@ -430,7 +430,7 @@ def test_all_to_all(): platform.system() == "Darwin", reason="do not imp GPU mode at macos now" ) @pytest.mark.skipif( - platform.system() == "Windows", reason="do not imp GPU mode at Windows now" + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" ) @pytest.mark.isolated_distributed def test_io_remote(): diff --git a/imperative/python/test/unit/test_autodiff.py b/imperative/python/test/unit/test_autodiff.py index 929e967cae28069bc287dcaae5159160d4b61d2d..85b60e82aff48d7dec099038363a3088b7cd765e 100644 --- a/imperative/python/test/unit/test_autodiff.py +++ b/imperative/python/test/unit/test_autodiff.py @@ -6,6 +6,7 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +import platform import weakref import numpy as np @@ -51,6 +52,9 @@ def save_to(self, name="grad"): @pytest.mark.isolated_distributed +@pytest.mark.skipif( + platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" +) def test_dist_grad(): world_size = 2 x_np = np.random.rand(10).astype("float32") diff --git a/imperative/src/impl/profiler.cpp b/imperative/src/impl/profiler.cpp index f35f5b3c9232c276ac3b0db93308e66ea5833ba0..623522ac47976b20dd899c160a76c6ce1b8cefea 100644 --- a/imperative/src/impl/profiler.cpp +++ b/imperative/src/impl/profiler.cpp @@ -9,7 +9,17 @@ #include "megbrain/imperative/profiler.h" +#if defined(_MSC_VER) || defined(WIN32) +#include +#define getpid GetCurrentProcessId +#else #include +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include +#endif + #include #include "megbrain/imperative/ops/opr_attr.h" diff --git a/imperative/src/impl/proxy_graph.cpp b/imperative/src/impl/proxy_graph.cpp index b750749ff921e5293b838a0b31b0021f53892bd4..7b0409f3decfa78b04c244e2aa29bcd73b20caa5 100644 --- a/imperative/src/impl/proxy_graph.cpp +++ b/imperative/src/impl/proxy_graph.cpp @@ -16,6 +16,10 @@ #include "megbrain/imperative/ops/opr_attr.h" #include "megbrain/imperative/ops/backward_graph.h" +#if __cplusplus >= 201703L +#include +#endif + namespace mgb { namespace imperative { diff --git a/imperative/test/CMakeLists.txt b/imperative/test/CMakeLists.txt index 6b766cddce55b0893d49907bfa010d159d662fb7..7e50124a30927a35092010333b9b5a287e5ae55e 100644 --- a/imperative/test/CMakeLists.txt +++ b/imperative/test/CMakeLists.txt @@ -38,8 +38,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) endif() if(UNIX) - target_link_libraries(imperative_test dl rt) + if(APPLE OR ANDROID) + target_link_libraries(imperative_test dl) + else() + target_link_libraries(imperative_test dl rt) + endif() endif() - install(TARGETS imperative_test RUNTIME DESTINATION test) diff --git a/python_module/CMakeLists.txt b/python_module/CMakeLists.txt index c64b520d234b98995fe89ae6783b712631ecd81d..e23c2488da168c501bdbab4dc0d2ade4ee19ed3c 100644 --- a/python_module/CMakeLists.txt +++ b/python_module/CMakeLists.txt @@ -81,7 +81,10 @@ else() target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) endif() target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR}) -target_link_libraries(mgb ${PYTHON_LIBRARIES}) +# only windows need link PYTHON_LIBRARIES +if(MSVC OR WIN32) + target_link_libraries(mgb ${PYTHON_LIBRARIES}) +endif() if (MGE_WITH_DISTRIBUTED) target_link_libraries(mgb megray) diff --git a/scripts/cmake-build/BUILD_README.md b/scripts/cmake-build/BUILD_README.md index f9c70a510254655e776d71f0d19f606305a20318..457adcd667ded01b5543bcf10e9e2e2bee43c868 100644 --- a/scripts/cmake-build/BUILD_README.md +++ b/scripts/cmake-build/BUILD_README.md @@ -30,11 +30,17 @@ 4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env 4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path if u do not do 4d/4e/4f, CUDA runtime can not find dll + 5: install python3 (DFT 3.8.3) to /c/Users/${USER}/mge_whl_python_env/3.8.3 and + put it to PATH env and run python3 -m pip install numpy (if u want to build with training mode or build python whl) + 6: install swig from install gui (if u want to build with training mode or build python whl) + a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip + b: install swig to /c/Users/${USER}/swigwin-4.0.2 + c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 ``` ### linux host build ``` 1: cmake, which version > 3.14.4 - 2: gcc/g++, which version > 6 + 2: gcc/g++, which version > 6, (gcc/g++ >= 7, if need build training) 3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl 4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool: 5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo @@ -47,6 +53,7 @@ 3: brew install python python3 swig coreutils 4: install at least xcode command line tool: https://developer.apple.com/xcode/ 5: about cuda: we do not support CUDA on macos + 6: python3 -m pip install numpy (if u want to build with training mode or build python whl) ``` ### cross build for arm-android now we support windows/linux/macos cross build to arm-android diff --git a/scripts/cmake-build/host_build.sh b/scripts/cmake-build/host_build.sh index 78e1562e3b817855bd0b076d647de061eff665d2..8a8f1508fb9f7490301d22688fc65b26f36b40ac 100755 --- a/scripts/cmake-build/host_build.sh +++ b/scripts/cmake-build/host_build.sh @@ -9,6 +9,7 @@ function usage() { echo "-t : Build with training mode, default inference only" echo "-m : Build with m32 mode(only for windows build), default m64" echo "-r : remove old build dir before make, default off" + echo "-n : enable new python runtime(valid when training mode with -t, default is legacy runtime)" echo "-h : show usage" echo "append other cmake config by export EXTRA_CMAKE_ARGS=..." echo "example: $0 -d" @@ -22,9 +23,10 @@ MGE_WINDOWS_BUILD_ARCH=x64 MGE_WINDOWS_BUILD_MARCH=m64 MGE_ARCH=x86_64 REMOVE_OLD_BUILD=false +MGE_BUILD_IMPERATIVE_RT=OFF echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" -while getopts "rhdctm" arg +while getopts "rhdctmn" arg do case $arg in d) @@ -48,11 +50,15 @@ do REMOVE_OLD_BUILD=true ;; m) - echo "build for m32(only use for windows)" + echo "build for m32(only valid use for windows)" MGE_WINDOWS_BUILD_ARCH=x86 MGE_WINDOWS_BUILD_MARCH=m32 MGE_ARCH=i386 ;; + n) + echo "Enable imperative python wrapper runtime" + MGE_BUILD_IMPERATIVE_RT=ON + ;; ?) echo "unkonw argument" usage @@ -101,6 +107,7 @@ function cmake_build() { cmake \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ + -DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ ${EXTRA_CMAKE_ARGS} \ @@ -112,7 +119,7 @@ function cmake_build() { function windows_env_err() { echo "check windows env failed!!" - echo "please install LLVM/clang-cl/cmake/python at Visual Studio Extensions" + echo "please install env refs for: scripts/cmake-build/BUILD_README.md" exit -1 } @@ -178,6 +185,25 @@ function prepare_env_for_windows_build() { export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH export INCLUDE=$INCLUDE:$CPATH + + # python version will be config by whl build script or ci script, we need + # a DFT version for build success when we just call host_build.sh + if [[ -z ${ALREADY_CONFIG_PYTHON_VER} ]] + then + echo "config a default python3" + DFT_PYTHON_BIN=/c/Users/${USER}/mge_whl_python_env/3.8.3 + if [ ! -f "${DFT_PYTHON_BIN}/python3.exe" ]; then + echo "ERR: can not find ${DFT_PYTHON_BIN}/python3.exe , Invalid env" + windows_env_err + else + echo "put python3 to env..." + export PATH=${DFT_PYTHON_BIN}:$PATH + which python3 + fi + fi + + echo "export swig pwd to PATH" + export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH } WINDOWS_BUILD_TARGET="Ninja all > build.log" @@ -218,6 +244,7 @@ function cmake_build_windows() { vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \ -DMGE_ARCH=$MGE_ARCH \ -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ + -DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \ @@ -230,8 +257,18 @@ function cmake_build_windows() { ${WINDOWS_BUILD_TARGET}" } +if [ ${MGE_BUILD_IMPERATIVE_RT} = "ON" ] && [ ${MGE_INFERENCE_ONLY} = "ON" ]; then + echo "ERR: MGE_BUILD_IMPERATIVE_RT(-n) only valid when enable training mode(-t)" + echo "pls remove -n or add -t" + exit -1 +fi if [[ $OS =~ "NT" ]]; then + if [ ${MGE_ARCH} = "i386" ] && [ ${MGE_INFERENCE_ONLY} = "OFF" ]; then + echo "ERR: training mode(-t) only support 64 bit mode" + echo "pls remove -t or remove -m" + exit -1 + fi config_windows_build_target cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE else diff --git a/scripts/whl/BUILD_PYTHON_WHL_README.md b/scripts/whl/BUILD_PYTHON_WHL_README.md index 5636560414bff09a6e69360c48c082d9892ed241..07e55febded09009d84e169812a0298211ce78d5 100644 --- a/scripts/whl/BUILD_PYTHON_WHL_README.md +++ b/scripts/whl/BUILD_PYTHON_WHL_README.md @@ -53,10 +53,6 @@ d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate - 5: install swig from install gui - a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip - b: install swig to /c/Users/${USER}/swigwin-4.0.2 - c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 ``` # how to build @@ -90,6 +86,11 @@ ``` ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh ``` + If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: + + ``` + ALL_PYTHON=3.5.9 BUILD_IMPERATIVE="ON" ./scripts/whl/macos/macos_build_whl.sh + ``` ## build for windows ``` ./scripts/whl/windows/windows_build_whl.sh @@ -102,5 +103,7 @@ If you want to build windows whl with cuda, also a specific Python verison. eg: ``` - WINDOWS_WHL_WITH_CUDA="true" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh + WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh ``` + If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: + BUILD_IMPERATIVE="ON" WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh diff --git a/scripts/whl/macos/macos_build_whl.sh b/scripts/whl/macos/macos_build_whl.sh index b3d1a70ec894fabc5daf71cd3b463d94f9e1e945..a10912ddf668a605e1422d75f445585809624d51 100755 --- a/scripts/whl/macos/macos_build_whl.sh +++ b/scripts/whl/macos/macos_build_whl.sh @@ -65,16 +65,18 @@ function config_python_env() { fi echo ${ver} - #config a dir to trick cmake find a null pythonlib - PYTHON_LIBRARY=${PYTHON_DIR}lib/ if [ "$1" = "3.5.9" ]; then PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m + PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.5m.dylib elif [ "$1" = "3.6.10" ]; then PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m + PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.6m.dylib elif [ "$1" = "3.7.7" ]; then PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m + PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.7m.dylib elif [ "$1" = "3.8.3" ]; then PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8 + PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.8.dylib else echo "ERR: DO NOT SUPPORT PYTHON VERSION" echo "now support list: ${FULL_PYTHON_VER}" @@ -82,6 +84,11 @@ function config_python_env() { fi } +if [[ -z ${BUILD_IMPERATIVE} ]] +then + BUILD_IMPERATIVE="OFF" +fi + function do_build() { for ver in ${ALL_PYTHON} do @@ -89,7 +96,7 @@ function do_build() { config_python_env ${ver} #check env - if [ ! -d "$PYTHON_LIBRARY" ]; then + if [ ! -f "$PYTHON_LIBRARY" ]; then echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package" err_env fi @@ -102,14 +109,20 @@ function do_build() { #append cmake args for config python export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc - export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " + export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " #call build and install #FIXME: cmake do not triger update python config, after #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add #-r to remove build cache after a new ver build, which #will be more slow build than without -r - ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r + if [ ${BUILD_IMPERATIVE} = "ON" ]; then + echo "build whl with IMPERATIVE python rt" + ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -n -r + else + echo "build whl with legacy python rt" + ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r + fi #call setup.py BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/ @@ -121,12 +134,47 @@ function do_build() { fi mkdir -p staging + if [ ${BUILD_IMPERATIVE} = "ON" ]; then + echo "build whl with IMPERATIVE python rt" + cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ + cd ${BUILD_DIR}/staging/megengine/core + rt_file=`ls _imperative_rt.*.so` + echo "rt file is: ${rt_file}" + if [[ -z ${rt_file} ]] + then + echo "ERR: can not find valid rt file" + exit -1 + fi + llvm-strip -s ${rt_file} + mv ${rt_file} _imperative_rt.so + echo "check so valid or not..." + otool_out=`otool -L _imperative_rt.so` + if [[ "${otool_out}" =~ "ython" ]]; then + echo "ERR: invalid _imperative_rt.so which depend on python lib, detail: log" + echo ${otool_out} + exit -1 + else + echo "valid..." + fi + else + echo "build whl with legacy python rt" + + cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ + cd ${BUILD_DIR}/staging/megengine/_internal + #FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file + #will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so + echo "check so valid or not..." + llvm-strip -s _mgb.so + otool_out=`otool -L _mgb.so` + if [[ "${otool_out}" =~ "ython" ]]; then + echo "ERR: invalid _mgb.so which depend on python lib, detail: log" + echo ${otool_out} + exit -1 + else + echo "valid..." + fi + fi - cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ - cd ${BUILD_DIR}/staging/megengine/_internal - #FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file - #will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so - llvm-strip -s _mgb.so cd ${BUILD_DIR}/staging ${PYTHON_DIR}/bin/python3 setup.py bdist_wheel cd ${BUILD_DIR}/staging/dist/ diff --git a/scripts/whl/windows/windows_build_whl.sh b/scripts/whl/windows/windows_build_whl.sh index 434f3ed5c099ec51852720375f01b8c98265b063..1e1d553ad9d42359d732caa10bef93e8cbab6d9a 100755 --- a/scripts/whl/windows/windows_build_whl.sh +++ b/scripts/whl/windows/windows_build_whl.sh @@ -14,8 +14,6 @@ function err_env() { } function append_path_env_and_check() { - echo "export swig pwd to PATH" - export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH echo "export vs2019 install path" export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise # for llvm-strip @@ -62,7 +60,7 @@ function config_python_env() { if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]] then - WINDOWS_WHL_WITH_CUDA="false" + WINDOWS_WHL_WITH_CUDA="OFF" fi @@ -74,26 +72,46 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6 CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" +function depend_real_copy() { + REAL_DST=$1 + echo "real copy lib to $1" + cp "${TRT_LIB}" ${REAL_DST} + cp "${CUDNN_LIB}" ${REAL_DST} + cp "${CUSOLVER_LIB}" ${REAL_DST} + cp "${CUBLAS_LIB}" ${REAL_DST} + cp "${CURAND_LIB}" ${REAL_DST} + cp "${CUBLASLT_LIB}" ${REAL_DST} + cp "${CUDART_LIB}" ${REAL_DST} +} + function copy_more_dll() { # for python whl real use - CP_DST=${BUILD_DIR}/staging/megengine/_internal/lib - rm -rf ${CP_DST} - mkdir ${CP_DST} + if [ ${BUILD_IMPERATIVE} = "ON" ]; then + echo "config BUILD_IMPERATIVE core lib dir" + CP_WHL_DST=${BUILD_DIR}/staging/megengine/core/lib + else + echo "config legacy python lib dir" + CP_WHL_DST=${BUILD_DIR}/staging/megengine/_internal/lib + fi + rm -rf ${CP_WHL_DST} + mkdir ${CP_WHL_DST} + # workround for cpu-only version import failed, use a + # empty.file to triger setup.py to create a null empty + echo "empty" > ${CP_WHL_DST}/empty.file - if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then + if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then echo "copy nvidia lib to whl use...." - cp "${TRT_LIB}" ${CP_DST} - cp "${CUDNN_LIB}" ${CP_DST} - cp "${CUSOLVER_LIB}" ${CP_DST} - cp "${CUBLAS_LIB}" ${CP_DST} - cp "${CURAND_LIB}" ${CP_DST} - cp "${CUBLASLT_LIB}" ${CP_DST} - cp "${CUDART_LIB}" ${CP_DST} + depend_real_copy ${CP_WHL_DST} fi } +if [[ -z ${BUILD_IMPERATIVE} ]] +then + BUILD_IMPERATIVE="OFF" +fi + function do_build() { for ver in ${ALL_PYTHON} do @@ -118,21 +136,31 @@ function do_build() { #force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python #export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc - export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " + export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " #call build and install #FIXME: cmake do not triger update python config, after #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add #-r to remove build cache after a new ver build, which #will be more slow build than without -r - if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then + BUILD_ARGS=" -t -r" + if [ ${BUILD_IMPERATIVE} = "ON" ]; then + echo "build whl with IMPERATIVE python rt" + BUILD_ARGS="${BUILD_ARGS} -n " + else + echo "build whl with legacy python rt" + fi + + if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then echo "build windows whl with cuda" - ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r -c + BUILD_ARGS="${BUILD_ARGS} -c " else echo "build windows whl with cpu only" - ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r fi + echo "host_build.sh BUILD_ARGS: ${BUILD_ARGS}" + ${SRC_DIR}/scripts/cmake-build/host_build.sh ${BUILD_ARGS} + #call setup.py BUILD_DIR=${SRC_DIR}/build_dir/host/build/ cd ${BUILD_DIR} @@ -143,10 +171,27 @@ function do_build() { fi mkdir -p staging + if [ ${BUILD_IMPERATIVE} = "ON" ]; then + echo "build whl with IMPERATIVE python rt" + cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ + cd ${BUILD_DIR}/staging/megengine/core + rt_file=`ls _imperative_rt.*.pyd` + echo "rt file is: ${rt_file}" + if [[ -z ${rt_file} ]] + then + echo "ERR: can not find valid rt file" + exit -1 + fi + llvm-strip -s ${rt_file} + mv ${rt_file} _imperative_rt.pyd + else + echo "build whl with legacy python rt" + + cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ + cd ${BUILD_DIR}/staging/megengine/_internal + llvm-strip -s _mgb.pyd + fi - cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ - cd ${BUILD_DIR}/staging/megengine/_internal - llvm-strip -s _mgb.pyd copy_more_dll cd ${BUILD_DIR}/staging ${PYTHON_DIR}/python3 setup.py bdist_wheel @@ -175,5 +220,6 @@ function third_party_prepare() { } ###################### +export ALREADY_CONFIG_PYTHON_VER="yes" third_party_prepare do_build diff --git a/src/core/impl/graph/seq_sublinear_memory.cpp b/src/core/impl/graph/seq_sublinear_memory.cpp index 59750d80a9c318c0559d5911cd10772f446ca18e..04d89309c9ebff9123cf477409a4ac23dfc6a882 100644 --- a/src/core/impl/graph/seq_sublinear_memory.cpp +++ b/src/core/impl/graph/seq_sublinear_memory.cpp @@ -33,6 +33,11 @@ class RNGxorshf { uint64_t s[2]; public: +#if __cplusplus >= 201703L + typedef uint64_t result_type; + static constexpr uint64_t min() { return 0; } + static constexpr uint64_t max() { return UINT64_MAX; } +#endif RNGxorshf(uint64_t seed) { std::mt19937_64 gen(seed); s[0] = gen(); @@ -936,8 +941,12 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() { } } m_cur_records = records; +#if __cplusplus >= 201703L + std::shuffle(perm.begin(), perm.end(), rng); +#else std::random_shuffle(perm.begin(), perm.end(), [&](size_t x) { return rng() % x; }); +#endif for (size_t i = 0; i < length; ++i) { invoke_search(mutation(mutation(records[i].first))); invoke_search(crossover(records[i].first, records[perm[i]].first)); diff --git a/src/opr/test/blas.cpp b/src/opr/test/blas.cpp index dab00573b2be80bb404616a4205aa10922234fff..71c4fb2bbae00a04e82c1c13369beb97bb0d7e39 100644 --- a/src/opr/test/blas.cpp +++ b/src/opr/test/blas.cpp @@ -705,7 +705,12 @@ TEST(TestOprBlas, MatrixInverse) { } auto ptr = inp[0]->ptr(); for (size_t i = 0; i < batch; ++i, ptr += n * n) { +#if __cplusplus >= 201703L + std::default_random_engine rng_engine; + std::shuffle(perm.begin(), perm.end(), rng_engine); +#else std::random_shuffle(perm.begin(), perm.end()); +#endif for (size_t j = 0; j < n; ++j) { ptr[j * n + perm[j]] += 5; } diff --git a/src/opr/test/muxing.cpp b/src/opr/test/muxing.cpp index fdc09b02319f44e580d071e9036888d8a7e110e6..5fec111bf1a3e41866f4ad67e372fc21725124fd 100644 --- a/src/opr/test/muxing.cpp +++ b/src/opr/test/muxing.cpp @@ -36,7 +36,12 @@ void run_all_gather(const std::vector& axis_size, bool& success, sleep_time.push_back(i * 0.05 + 0.1); tot_axis_size += axis_size[i]; } +#if __cplusplus >= 201703L + std::default_random_engine rng_engine; + std::shuffle(sleep_time.begin(), sleep_time.end(), rng_engine); +#else std::random_shuffle(sleep_time.begin(), sleep_time.end()); +#endif auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA; size_t nr_dev = std::min( diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 45cc3d690352318f9130aa6a919e29d1fc532473..4c76dfbf14c540625da001de1377228bd1d50580 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,7 +18,11 @@ endif() add_executable(megbrain_test ${SOURCES}) target_link_libraries(megbrain_test gtest) -target_link_libraries(megbrain_test megengine) +if(MSVC OR WIN32) + target_link_libraries(megbrain_test megbrain megdnn) +else() + target_link_libraries(megbrain_test megengine) +endif() if(CXX_SUPPORT_WCLASS_MEMACCESS) if(MGE_WITH_CUDA) target_compile_options(megbrain_test PRIVATE "$<$:-Xcompiler=-Wno-class-memaccess>" @@ -28,10 +32,12 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) endif() endif() -if(APPLE OR ANDROID) - target_link_libraries(megbrain_test dl) -else() - target_link_libraries(megbrain_test dl rt) +if(UNIX) + if(APPLE OR ANDROID) + target_link_libraries(megbrain_test dl) + else() + target_link_libraries(megbrain_test dl rt) + endif() endif() if (MGE_WITH_DISTRIBUTED)