提交 6e882c1a 编写于 作者: M Megvii Engine Team

feat(whl/imperative): compat for build python whl imperative and legacy runtime

GitOrigin-RevId: 7f6629ae1f84b4aec3a4211f22b1d8d18d36a1b7
上级 40d18c89
......@@ -697,8 +697,10 @@ endif()
if(MGE_WITH_PYTHON_MODULE)
if(MGE_BUILD_IMPERATIVE_RT)
add_subdirectory(imperative)
message("-- Enable imperative python wrapper runtime")
else()
add_subdirectory(python_module)
message("-- Enable legacy python wrapper runtime")
endif()
endif()
......
......@@ -342,7 +342,11 @@ template <typename T>
struct SafeMultiplies;
template <typename T>
#if __cplusplus >= 201703L
struct _SafeMultipliesImplUnsigned {
#else
struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> {
#endif
static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8;
static size_t clz(unsigned x) {
......
......@@ -70,8 +70,10 @@ if (MEG_WITH_ROCM)
target_link_libraries (megdnn_test ${MGE_ROCM_LIBS})
endif ()
if(APPLE OR ANDROID)
target_link_libraries(megdnn_test dl)
else()
target_link_libraries(megdnn_test dl rt)
if(UNIX)
if(APPLE OR ANDROID)
target_link_libraries(megdnn_test dl)
else()
target_link_libraries(megdnn_test dl rt)
endif()
endif()
......@@ -89,7 +89,7 @@ public:
auto ptr = tensor.ptr<int>();
for (size_t n = 0; n < size; ++n) {
std::set<int> used;
std::random_shuffle(seq.begin(), seq.end());
COMPAT_RANDOM(seq.begin(), seq.end());
for (size_t step = 0; step < stride; ++step) {
megdnn_assert(used.size() < m_size);
ptr[n * stride + step] = seq[step];
......
......@@ -75,7 +75,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) {
i2f.i = static_cast<uint16_t>(x);
m_sequence.push_back(i2f.f);
}
std::random_shuffle(m_sequence.begin(), m_sequence.end());
COMPAT_RANDOM(m_sequence.begin(), m_sequence.end());
}
Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) {
......@@ -99,7 +99,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) {
m_sequence.push_back(i2f.f);
}
std::random_shuffle(m_sequence.begin(), m_sequence.end());
COMPAT_RANDOM(m_sequence.begin(), m_sequence.end());
}
void Float16PeriodicalRNG::gen(const TensorND& tensor) {
......
......@@ -19,6 +19,16 @@
namespace megdnn {
namespace test {
#if __cplusplus >= 201703L
#define COMPAT_RANDOM(begin, end) \
{ \
std::default_random_engine rng_engine; \
std::shuffle(begin, end, rng_engine); \
}
#else
#define COMPAT_RANDOM(begin, end) std::random_shuffle(begin, end);
#endif
class RNG {
protected:
class RNGxorshf;
......
......@@ -24,15 +24,16 @@ class ArgmxxRNG final: public RNG {
void gen(const TensorND &tensor) override {
auto offset = tensor.layout.span().low_elem;
auto nr_elems = tensor.layout.span().dist_elem();
#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset+i] = i; \
} \
std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \
}
#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset + i] = i; \
} \
COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb);
#undef cb
}
......
......@@ -32,7 +32,7 @@ class ArgsortRNG final : public RNG {
} else {
for (int i = 0; i < n; ++i)
ptr[i] = static_cast<T>(i - n / 2);
std::random_shuffle(ptr, ptr + n);
COMPAT_RANDOM(ptr, ptr + n);
}
}
......@@ -86,7 +86,7 @@ void run_backward_test(Handle* handle, DType dtype) {
for (size_t j = 0; j < n; ++j) {
ptr[j] = j;
}
std::random_shuffle(ptr, ptr + n);
COMPAT_RANDOM(ptr, ptr + n);
ptr += n;
}
}
......
......@@ -361,9 +361,8 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_7) {
for (size_t r = 0; r < _dim.size(); r++)
permutation[r] = r;
for (int nsample = 0; nsample < 50; nsample++) {
std::random_shuffle(_dim.begin(), _dim.end());
std::random_shuffle(permutation.begin(), permutation.end());
COMPAT_RANDOM(_dim.begin(), _dim.end());
COMPAT_RANDOM(permutation.begin(), permutation.end());
if (!isTrivial(permutation)) {
run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5],
_dim[6]},
......@@ -451,9 +450,10 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_5) {
printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re);
// printVec(dim);
std::random_shuffle(dim.begin(), dim.end());
COMPAT_RANDOM(dim.begin(), dim.end());
while (isTrivial(permutation)) {
std::random_shuffle(permutation.begin(), permutation.end());
COMPAT_RANDOM(permutation.begin(), permutation.end());
}
run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()},
......@@ -603,8 +603,9 @@ TEST_F(CUDA, BENCHMARK_LAST_CONTIG_ALIGN_TEST) {
for (size_t r = 0; r < _dim.size(); r++)
permutation[r] = r;
for (int nsample = 0; nsample < 20; nsample++) {
std::random_shuffle(_dim.begin(), _dim.end() - 1);
std::random_shuffle(permutation.begin(), permutation.end() - 1);
COMPAT_RANDOM(_dim.begin(), _dim.end() - 1);
COMPAT_RANDOM(permutation.begin(), permutation.end() - 1);
if (nsample < 5)
_dim[5] = (u.gen_single_val() / 4 + 1) * 4;
......
......@@ -24,7 +24,7 @@ using namespace test;
TEST_F(CUDA, SLEEP) {
auto opr = this->handle_cuda()->create_operator<Sleep>();
auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>();
auto run = [&](float time) -> double {
opr->param() = {time};
......
......@@ -24,16 +24,17 @@ class ArgmxxRNG final: public RNG {
void gen(const TensorND &tensor) override {
auto offset = tensor.layout.span().low_elem;
auto nr_elems = tensor.layout.span().dist_elem();
#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset+i] = i; \
} \
std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \
return; \
}
#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset + i] = i; \
} \
COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
#undef cb
megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s",
......
......@@ -76,7 +76,11 @@ add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT})
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11)
pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS})
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT})
if (APPLE OR MSVC OR WIN32)
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn)
else()
target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT})
endif()
if (MGE_WITH_DISTRIBUTED)
message("Imperative configured to link megray")
target_link_libraries(${MODULE_NAME} PRIVATE megray)
......@@ -91,6 +95,10 @@ set_target_properties(${MODULE_NAME} PROPERTIES
SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}
LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core
)
if (APPLE OR MSVC OR WIN32)
message("-- overwriting SUFFIX at macos and windows before config by set_target_properties")
pybind11_extension(${MODULE_NAME})
endif()
add_dependencies(${MODULE_NAME} gen_opr_py _version_ld)
if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
......
......@@ -8,6 +8,67 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import os
import sys
import platform
import ctypes
if sys.platform == "win32":
lib_path = os.path.join(os.path.dirname(__file__), "core/lib")
dll_paths = list(filter(os.path.exists, [lib_path,]))
assert len(dll_paths) > 0
kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True)
has_load_library_attr = hasattr(kernel32, "AddDllDirectory")
old_error_mode = kernel32.SetErrorMode(0x0001)
kernel32.LoadLibraryW.restype = ctypes.c_void_p
if has_load_library_attr:
kernel32.AddDllDirectory.restype = ctypes.c_void_p
kernel32.LoadLibraryExW.restype = ctypes.c_void_p
for dll_path in dll_paths:
if sys.version_info >= (3, 8):
os.add_dll_directory(dll_path)
elif has_load_library_attr:
res = kernel32.AddDllDirectory(dll_path)
if res is None:
err = ctypes.WinError(ctypes.get_last_error())
err.strerror += ' Error adding "{}" to the DLL search PATH.'.format(
dll_path
)
raise err
else:
print("WARN: python or OS env have some issue, may load DLL failed!!!")
import glob
dlls = glob.glob(os.path.join(lib_path, "*.dll"))
path_patched = False
for dll in dlls:
is_loaded = False
if has_load_library_attr:
res = kernel32.LoadLibraryExW(dll, None, 0x00001100)
last_error = ctypes.get_last_error()
if res is None and last_error != 126:
err = ctypes.WinError(last_error)
err.strerror += ' Error loading "{}" or one of its dependencies.'.format(
dll
)
raise err
elif res is not None:
is_loaded = True
if not is_loaded:
if not path_patched:
os.environ["PATH"] = ";".join(dll_paths + [os.environ["PATH"]])
path_patched = True
res = kernel32.LoadLibraryW(dll)
if res is None:
err = ctypes.WinError(ctypes.get_last_error())
err.strerror += ' Error loading "{}" or one of its dependencies.'.format(
dll
)
raise err
kernel32.SetErrorMode(old_error_mode)
from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func
from .device import *
......
......@@ -6,10 +6,14 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import resource
import platform
import sys
import threading
# Windows do not imp resource package
if platform.system() != "Windows":
import resource
class AlternativeRecursionLimit:
r"""A reentrant context manager for setting global recursion limits.
......@@ -28,16 +32,24 @@ class AlternativeRecursionLimit:
with self.lock:
if self.count == 0:
self.orig_py_limit = sys.getrecursionlimit()
if platform.system() != "Windows":
(
self.orig_rlim_stack_soft,
self.orig_rlim_stack_hard,
) = resource.getrlimit(resource.RLIMIT_STACK)
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard),
)
# increase recursion limit
sys.setrecursionlimit(self.new_py_limit)
# FIXME: https://bugs.python.org/issue34602, python3 release version
# on Macos always have this issue, not all user install python3 from src
try:
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_hard, self.orig_rlim_stack_hard),
)
except ValueError as exc:
if platform.system() != "Darwin":
raise exc
# increase recursion limit
sys.setrecursionlimit(self.new_py_limit)
self.count += 1
def __exit__(self, type, value, traceback):
......@@ -45,10 +57,16 @@ class AlternativeRecursionLimit:
self.count -= 1
if self.count == 0:
sys.setrecursionlimit(self.orig_py_limit)
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard),
)
if platform.system() != "Windows":
try:
resource.setrlimit(
resource.RLIMIT_STACK,
(self.orig_rlim_stack_soft, self.orig_rlim_stack_hard),
)
except ValueError as exc:
if platform.system() != "Darwin":
raise exc
_max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1)
......
......@@ -9,6 +9,7 @@
import os
import re
import pathlib
import platform
from distutils.file_util import copy_file
from setuptools import setup, find_packages, Extension
from setuptools.command.build_ext import build_ext as _build_ext
......@@ -29,7 +30,10 @@ class build_ext(_build_ext):
extdir.parent.mkdir(parents=True, exist_ok=True)
modpath = self.get_ext_fullname(ext.name).split('.')
modpath[-1] += '.so'
if platform.system() == 'Windows':
modpath[-1] += '.pyd'
else:
modpath[-1] += '.so'
modpath = str(pathlib.Path(*modpath).resolve())
copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run)
......@@ -47,6 +51,14 @@ if local_version:
__version__ = '{}+{}'.format(__version__, local_version)
packages = find_packages(exclude=['test'])
package_data = [
str(f.relative_to('megengine'))
for f in pathlib.Path('megengine', 'core', 'include').glob('**/*')
]
package_data += [
str(f.relative_to('megengine'))
for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*')
]
with open('requires.txt') as f:
requires = f.read().splitlines()
......@@ -63,6 +75,9 @@ setup_kwargs = dict(
author='Megvii Engine Team',
author_email=email,
packages=packages,
package_data={
'megengine': package_data,
},
ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')],
install_requires=requires,
extras_require={
......
......@@ -9,15 +9,6 @@
#include "megbrain/utils/mempool.h"
#include "./numpy_dtypes.h"
/*
* demangle typeid, see
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname
*/
#ifdef __GNUG__
#include <cstdlib>
#include <memory>
#include <cxxabi.h>
namespace py = pybind11;
PyTaskDipatcher py_task_q = {};
......@@ -34,10 +25,18 @@ py::module rel_import(py::str name, py::module m, int level) {
return import(name, m.attr("__dict__"), py::arg("level")=level);
}
/*
* demangle typeid, see
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname
*/
#ifdef __GNUG__
#include <cxxabi.h>
#include <cstdlib>
#include <memory>
namespace {
std::string demangle_typeid(const char* name) {
int status = -4; // some arbitrary value to eliminate the compiler warning
// enable c++11 by passing the flag -std=c++11 to g++
......@@ -48,7 +47,7 @@ std::string demangle_typeid(const char* name) {
return (status==0) ? res.get() : name ;
}
}
} // namespace
#else
namespace {
......
#include "utils.h"
#ifdef WIN32
#include <stdio.h>
#include <windows.h>
#endif
#include <pybind11/operators.h>
#include <atomic>
......
......@@ -8,6 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import multiprocessing as mp
import os
import platform
import re
import subprocess
import sys
......@@ -196,6 +197,9 @@ def run_test(
@pytest.mark.isolated_distributed
@pytest.mark.skipif(
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
def test_dp_correctness():
model_name = "mnist_model_with_test.mge"
model_path = os.path.join(os.path.dirname(__file__), model_name)
......
......@@ -35,7 +35,7 @@ from megengine.functional.distributed import (
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_reduce_sum():
......@@ -77,7 +77,7 @@ def test_reduce_sum():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_broadcast():
......@@ -115,7 +115,7 @@ def test_broadcast():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_all_gather():
......@@ -154,7 +154,7 @@ def test_all_gather():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_reduce_scatter_sum():
......@@ -193,7 +193,7 @@ def test_reduce_scatter_sum():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_all_reduce_sum():
......@@ -232,7 +232,7 @@ def test_all_reduce_sum():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_all_reduce_max():
......@@ -271,7 +271,7 @@ def test_all_reduce_max():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_all_reduce_min():
......@@ -310,7 +310,7 @@ def test_all_reduce_min():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_gather():
......@@ -352,7 +352,7 @@ def test_gather():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_scatter():
......@@ -390,7 +390,7 @@ def test_scatter():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_all_to_all():
......@@ -430,7 +430,7 @@ def test_all_to_all():
platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
platform.system() == "Windows", reason="do not imp GPU mode at Windows now"
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.isolated_distributed
def test_io_remote():
......
......@@ -6,6 +6,7 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import platform
import weakref
import numpy as np
......@@ -51,6 +52,9 @@ def save_to(self, name="grad"):
@pytest.mark.isolated_distributed
@pytest.mark.skipif(
platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
def test_dist_grad():
world_size = 2
x_np = np.random.rand(10).astype("float32")
......
......@@ -9,7 +9,17 @@
#include "megbrain/imperative/profiler.h"
#if defined(_MSC_VER) || defined(WIN32)
#include <windows.h>
#define getpid GetCurrentProcessId
#else
#include <sys/unistd.h>
#endif
#if defined(__APPLE__) || defined(__MACOSX)
#include <unistd.h>
#endif
#include <variant>
#include "megbrain/imperative/ops/opr_attr.h"
......
......@@ -16,6 +16,10 @@
#include "megbrain/imperative/ops/opr_attr.h"
#include "megbrain/imperative/ops/backward_graph.h"
#if __cplusplus >= 201703L
#include <optional>
#endif
namespace mgb {
namespace imperative {
......
......@@ -38,8 +38,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS)
endif()
if(UNIX)
target_link_libraries(imperative_test dl rt)
if(APPLE OR ANDROID)
target_link_libraries(imperative_test dl)
else()
target_link_libraries(imperative_test dl rt)
endif()
endif()
install(TARGETS imperative_test RUNTIME DESTINATION test)
......@@ -81,7 +81,10 @@ else()
target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT})
endif()
target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR})
target_link_libraries(mgb ${PYTHON_LIBRARIES})
# only windows need link PYTHON_LIBRARIES
if(MSVC OR WIN32)
target_link_libraries(mgb ${PYTHON_LIBRARIES})
endif()
if (MGE_WITH_DISTRIBUTED)
target_link_libraries(mgb megray)
......
......@@ -30,11 +30,17 @@
4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env
4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path
if u do not do 4d/4e/4f, CUDA runtime can not find dll
5: install python3 (DFT 3.8.3) to /c/Users/${USER}/mge_whl_python_env/3.8.3 and
put it to PATH env and run python3 -m pip install numpy (if u want to build with training mode or build python whl)
6: install swig from install gui (if u want to build with training mode or build python whl)
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip
b: install swig to /c/Users/${USER}/swigwin-4.0.2
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2
```
### linux host build
```
1: cmake, which version > 3.14.4
2: gcc/g++, which version > 6
2: gcc/g++, which version > 6, (gcc/g++ >= 7, if need build training)
3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl
4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool:
5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo
......@@ -47,6 +53,7 @@
3: brew install python python3 swig coreutils
4: install at least xcode command line tool: https://developer.apple.com/xcode/
5: about cuda: we do not support CUDA on macos
6: python3 -m pip install numpy (if u want to build with training mode or build python whl)
```
### cross build for arm-android
now we support windows/linux/macos cross build to arm-android
......
......@@ -9,6 +9,7 @@ function usage() {
echo "-t : Build with training mode, default inference only"
echo "-m : Build with m32 mode(only for windows build), default m64"
echo "-r : remove old build dir before make, default off"
echo "-n : enable new python runtime(valid when training mode with -t, default is legacy runtime)"
echo "-h : show usage"
echo "append other cmake config by export EXTRA_CMAKE_ARGS=..."
echo "example: $0 -d"
......@@ -22,9 +23,10 @@ MGE_WINDOWS_BUILD_ARCH=x64
MGE_WINDOWS_BUILD_MARCH=m64
MGE_ARCH=x86_64
REMOVE_OLD_BUILD=false
MGE_BUILD_IMPERATIVE_RT=OFF
echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}"
while getopts "rhdctm" arg
while getopts "rhdctmn" arg
do
case $arg in
d)
......@@ -48,11 +50,15 @@ do
REMOVE_OLD_BUILD=true
;;
m)
echo "build for m32(only use for windows)"
echo "build for m32(only valid use for windows)"
MGE_WINDOWS_BUILD_ARCH=x86
MGE_WINDOWS_BUILD_MARCH=m32
MGE_ARCH=i386
;;
n)
echo "Enable imperative python wrapper runtime"
MGE_BUILD_IMPERATIVE_RT=ON
;;
?)
echo "unkonw argument"
usage
......@@ -101,6 +107,7 @@ function cmake_build() {
cmake \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \
-DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \
${EXTRA_CMAKE_ARGS} \
......@@ -112,7 +119,7 @@ function cmake_build() {
function windows_env_err() {
echo "check windows env failed!!"
echo "please install LLVM/clang-cl/cmake/python at Visual Studio Extensions"
echo "please install env refs for: scripts/cmake-build/BUILD_README.md"
exit -1
}
......@@ -178,6 +185,25 @@ function prepare_env_for_windows_build() {
export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS
export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH
export INCLUDE=$INCLUDE:$CPATH
# python version will be config by whl build script or ci script, we need
# a DFT version for build success when we just call host_build.sh
if [[ -z ${ALREADY_CONFIG_PYTHON_VER} ]]
then
echo "config a default python3"
DFT_PYTHON_BIN=/c/Users/${USER}/mge_whl_python_env/3.8.3
if [ ! -f "${DFT_PYTHON_BIN}/python3.exe" ]; then
echo "ERR: can not find ${DFT_PYTHON_BIN}/python3.exe , Invalid env"
windows_env_err
else
echo "put python3 to env..."
export PATH=${DFT_PYTHON_BIN}:$PATH
which python3
fi
fi
echo "export swig pwd to PATH"
export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH
}
WINDOWS_BUILD_TARGET="Ninja all > build.log"
......@@ -218,6 +244,7 @@ function cmake_build_windows() {
vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \
-DMGE_ARCH=$MGE_ARCH \
-DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \
-DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \
......@@ -230,8 +257,18 @@ function cmake_build_windows() {
${WINDOWS_BUILD_TARGET}"
}
if [ ${MGE_BUILD_IMPERATIVE_RT} = "ON" ] && [ ${MGE_INFERENCE_ONLY} = "ON" ]; then
echo "ERR: MGE_BUILD_IMPERATIVE_RT(-n) only valid when enable training mode(-t)"
echo "pls remove -n or add -t"
exit -1
fi
if [[ $OS =~ "NT" ]]; then
if [ ${MGE_ARCH} = "i386" ] && [ ${MGE_INFERENCE_ONLY} = "OFF" ]; then
echo "ERR: training mode(-t) only support 64 bit mode"
echo "pls remove -t or remove -m"
exit -1
fi
config_windows_build_target
cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE
else
......
......@@ -53,10 +53,6 @@
d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip
d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt
d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate
5: install swig from install gui
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip
b: install swig to /c/Users/${USER}/swigwin-4.0.2
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2
```
# how to build
......@@ -90,6 +86,11 @@
```
ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh
```
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg:
```
ALL_PYTHON=3.5.9 BUILD_IMPERATIVE="ON" ./scripts/whl/macos/macos_build_whl.sh
```
## build for windows
```
./scripts/whl/windows/windows_build_whl.sh
......@@ -102,5 +103,7 @@
If you want to build windows whl with cuda, also a specific Python verison. eg:
```
WINDOWS_WHL_WITH_CUDA="true" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh
WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh
```
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg:
BUILD_IMPERATIVE="ON" WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh
......@@ -65,16 +65,18 @@ function config_python_env() {
fi
echo ${ver}
#config a dir to trick cmake find a null pythonlib
PYTHON_LIBRARY=${PYTHON_DIR}lib/
if [ "$1" = "3.5.9" ]; then
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.5m.dylib
elif [ "$1" = "3.6.10" ]; then
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.6m.dylib
elif [ "$1" = "3.7.7" ]; then
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.7m.dylib
elif [ "$1" = "3.8.3" ]; then
PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8
PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.8.dylib
else
echo "ERR: DO NOT SUPPORT PYTHON VERSION"
echo "now support list: ${FULL_PYTHON_VER}"
......@@ -82,6 +84,11 @@ function config_python_env() {
fi
}
if [[ -z ${BUILD_IMPERATIVE} ]]
then
BUILD_IMPERATIVE="OFF"
fi
function do_build() {
for ver in ${ALL_PYTHON}
do
......@@ -89,7 +96,7 @@ function do_build() {
config_python_env ${ver}
#check env
if [ ! -d "$PYTHON_LIBRARY" ]; then
if [ ! -f "$PYTHON_LIBRARY" ]; then
echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package"
err_env
fi
......@@ -102,14 +109,20 @@ function do_build() {
#append cmake args for config python
export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} "
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc
export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo "
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo "
#call build and install
#FIXME: cmake do not triger update python config, after
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add
#-r to remove build cache after a new ver build, which
#will be more slow build than without -r
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r
if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "build whl with IMPERATIVE python rt"
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -n -r
else
echo "build whl with legacy python rt"
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r
fi
#call setup.py
BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/
......@@ -121,12 +134,47 @@ function do_build() {
fi
mkdir -p staging
if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "build whl with IMPERATIVE python rt"
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/core
rt_file=`ls _imperative_rt.*.so`
echo "rt file is: ${rt_file}"
if [[ -z ${rt_file} ]]
then
echo "ERR: can not find valid rt file"
exit -1
fi
llvm-strip -s ${rt_file}
mv ${rt_file} _imperative_rt.so
echo "check so valid or not..."
otool_out=`otool -L _imperative_rt.so`
if [[ "${otool_out}" =~ "ython" ]]; then
echo "ERR: invalid _imperative_rt.so which depend on python lib, detail: log"
echo ${otool_out}
exit -1
else
echo "valid..."
fi
else
echo "build whl with legacy python rt"
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/_internal
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so
echo "check so valid or not..."
llvm-strip -s _mgb.so
otool_out=`otool -L _mgb.so`
if [[ "${otool_out}" =~ "ython" ]]; then
echo "ERR: invalid _mgb.so which depend on python lib, detail: log"
echo ${otool_out}
exit -1
else
echo "valid..."
fi
fi
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/_internal
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so
llvm-strip -s _mgb.so
cd ${BUILD_DIR}/staging
${PYTHON_DIR}/bin/python3 setup.py bdist_wheel
cd ${BUILD_DIR}/staging/dist/
......
......@@ -14,8 +14,6 @@ function err_env() {
}
function append_path_env_and_check() {
echo "export swig pwd to PATH"
export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH
echo "export vs2019 install path"
export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise
# for llvm-strip
......@@ -62,7 +60,7 @@ function config_python_env() {
if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]]
then
WINDOWS_WHL_WITH_CUDA="false"
WINDOWS_WHL_WITH_CUDA="OFF"
fi
......@@ -74,26 +72,46 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6
CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll"
CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll"
CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll"
function depend_real_copy() {
REAL_DST=$1
echo "real copy lib to $1"
cp "${TRT_LIB}" ${REAL_DST}
cp "${CUDNN_LIB}" ${REAL_DST}
cp "${CUSOLVER_LIB}" ${REAL_DST}
cp "${CUBLAS_LIB}" ${REAL_DST}
cp "${CURAND_LIB}" ${REAL_DST}
cp "${CUBLASLT_LIB}" ${REAL_DST}
cp "${CUDART_LIB}" ${REAL_DST}
}
function copy_more_dll() {
# for python whl real use
CP_DST=${BUILD_DIR}/staging/megengine/_internal/lib
rm -rf ${CP_DST}
mkdir ${CP_DST}
if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "config BUILD_IMPERATIVE core lib dir"
CP_WHL_DST=${BUILD_DIR}/staging/megengine/core/lib
else
echo "config legacy python lib dir"
CP_WHL_DST=${BUILD_DIR}/staging/megengine/_internal/lib
fi
rm -rf ${CP_WHL_DST}
mkdir ${CP_WHL_DST}
# workround for cpu-only version import failed, use a
# empty.file to triger setup.py to create a null empty
echo "empty" > ${CP_WHL_DST}/empty.file
if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then
if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then
echo "copy nvidia lib to whl use...."
cp "${TRT_LIB}" ${CP_DST}
cp "${CUDNN_LIB}" ${CP_DST}
cp "${CUSOLVER_LIB}" ${CP_DST}
cp "${CUBLAS_LIB}" ${CP_DST}
cp "${CURAND_LIB}" ${CP_DST}
cp "${CUBLASLT_LIB}" ${CP_DST}
cp "${CUDART_LIB}" ${CP_DST}
depend_real_copy ${CP_WHL_DST}
fi
}
if [[ -z ${BUILD_IMPERATIVE} ]]
then
BUILD_IMPERATIVE="OFF"
fi
function do_build() {
for ver in ${ALL_PYTHON}
do
......@@ -118,21 +136,31 @@ function do_build() {
#force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python
#export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} "
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc
export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo "
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo "
#call build and install
#FIXME: cmake do not triger update python config, after
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add
#-r to remove build cache after a new ver build, which
#will be more slow build than without -r
if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then
BUILD_ARGS=" -t -r"
if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "build whl with IMPERATIVE python rt"
BUILD_ARGS="${BUILD_ARGS} -n "
else
echo "build whl with legacy python rt"
fi
if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then
echo "build windows whl with cuda"
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r -c
BUILD_ARGS="${BUILD_ARGS} -c "
else
echo "build windows whl with cpu only"
${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r
fi
echo "host_build.sh BUILD_ARGS: ${BUILD_ARGS}"
${SRC_DIR}/scripts/cmake-build/host_build.sh ${BUILD_ARGS}
#call setup.py
BUILD_DIR=${SRC_DIR}/build_dir/host/build/
cd ${BUILD_DIR}
......@@ -143,10 +171,27 @@ function do_build() {
fi
mkdir -p staging
if [ ${BUILD_IMPERATIVE} = "ON" ]; then
echo "build whl with IMPERATIVE python rt"
cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/core
rt_file=`ls _imperative_rt.*.pyd`
echo "rt file is: ${rt_file}"
if [[ -z ${rt_file} ]]
then
echo "ERR: can not find valid rt file"
exit -1
fi
llvm-strip -s ${rt_file}
mv ${rt_file} _imperative_rt.pyd
else
echo "build whl with legacy python rt"
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/_internal
llvm-strip -s _mgb.pyd
fi
cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/
cd ${BUILD_DIR}/staging/megengine/_internal
llvm-strip -s _mgb.pyd
copy_more_dll
cd ${BUILD_DIR}/staging
${PYTHON_DIR}/python3 setup.py bdist_wheel
......@@ -175,5 +220,6 @@ function third_party_prepare() {
}
######################
export ALREADY_CONFIG_PYTHON_VER="yes"
third_party_prepare
do_build
......@@ -33,6 +33,11 @@ class RNGxorshf {
uint64_t s[2];
public:
#if __cplusplus >= 201703L
typedef uint64_t result_type;
static constexpr uint64_t min() { return 0; }
static constexpr uint64_t max() { return UINT64_MAX; }
#endif
RNGxorshf(uint64_t seed) {
std::mt19937_64 gen(seed);
s[0] = gen();
......@@ -936,8 +941,12 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() {
}
}
m_cur_records = records;
#if __cplusplus >= 201703L
std::shuffle(perm.begin(), perm.end(), rng);
#else
std::random_shuffle(perm.begin(), perm.end(),
[&](size_t x) { return rng() % x; });
#endif
for (size_t i = 0; i < length; ++i) {
invoke_search(mutation(mutation(records[i].first)));
invoke_search(crossover(records[i].first, records[perm[i]].first));
......
......@@ -705,7 +705,12 @@ TEST(TestOprBlas, MatrixInverse) {
}
auto ptr = inp[0]->ptr<float>();
for (size_t i = 0; i < batch; ++i, ptr += n * n) {
#if __cplusplus >= 201703L
std::default_random_engine rng_engine;
std::shuffle(perm.begin(), perm.end(), rng_engine);
#else
std::random_shuffle(perm.begin(), perm.end());
#endif
for (size_t j = 0; j < n; ++j) {
ptr[j * n + perm[j]] += 5;
}
......
......@@ -36,7 +36,12 @@ void run_all_gather(const std::vector<size_t>& axis_size, bool& success,
sleep_time.push_back(i * 0.05 + 0.1);
tot_axis_size += axis_size[i];
}
#if __cplusplus >= 201703L
std::default_random_engine rng_engine;
std::shuffle(sleep_time.begin(), sleep_time.end(), rng_engine);
#else
std::random_shuffle(sleep_time.begin(), sleep_time.end());
#endif
auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA;
size_t nr_dev = std::min<size_t>(
......
......@@ -18,7 +18,11 @@ endif()
add_executable(megbrain_test ${SOURCES})
target_link_libraries(megbrain_test gtest)
target_link_libraries(megbrain_test megengine)
if(MSVC OR WIN32)
target_link_libraries(megbrain_test megbrain megdnn)
else()
target_link_libraries(megbrain_test megengine)
endif()
if(CXX_SUPPORT_WCLASS_MEMACCESS)
if(MGE_WITH_CUDA)
target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>"
......@@ -28,10 +32,12 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS)
endif()
endif()
if(APPLE OR ANDROID)
target_link_libraries(megbrain_test dl)
else()
target_link_libraries(megbrain_test dl rt)
if(UNIX)
if(APPLE OR ANDROID)
target_link_libraries(megbrain_test dl)
else()
target_link_libraries(megbrain_test dl rt)
endif()
endif()
if (MGE_WITH_DISTRIBUTED)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册