Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
6e882c1a
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
6e882c1a
编写于
8月 24, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(whl/imperative): compat for build python whl imperative and legacy runtime
GitOrigin-RevId: 7f6629ae1f84b4aec3a4211f22b1d8d18d36a1b7
上级
40d18c89
变更
33
显示空白变更内容
内联
并排
Showing
33 changed file
with
439 addition
and
119 deletion
+439
-119
CMakeLists.txt
CMakeLists.txt
+2
-0
dnn/src/common/utils.h
dnn/src/common/utils.h
+4
-0
dnn/test/CMakeLists.txt
dnn/test/CMakeLists.txt
+6
-4
dnn/test/common/mesh_indexing.h
dnn/test/common/mesh_indexing.h
+1
-1
dnn/test/common/rng.cpp
dnn/test/common/rng.cpp
+2
-2
dnn/test/common/rng.h
dnn/test/common/rng.h
+10
-0
dnn/test/cuda/argmxx.cpp
dnn/test/cuda/argmxx.cpp
+10
-9
dnn/test/cuda/argsort.cpp
dnn/test/cuda/argsort.cpp
+2
-2
dnn/test/cuda/relayout.cpp
dnn/test/cuda/relayout.cpp
+8
-7
dnn/test/cuda/sleep.cpp
dnn/test/cuda/sleep.cpp
+1
-1
dnn/test/rocm/argmxx.cpp
dnn/test/rocm/argmxx.cpp
+11
-10
imperative/CMakeLists.txt
imperative/CMakeLists.txt
+9
-1
imperative/python/megengine/__init__.py
imperative/python/megengine/__init__.py
+61
-0
imperative/python/megengine/utils/max_recursion_limit.py
imperative/python/megengine/utils/max_recursion_limit.py
+29
-11
imperative/python/setup.py
imperative/python/setup.py
+16
-1
imperative/python/src/helper.cpp
imperative/python/src/helper.cpp
+10
-11
imperative/python/src/utils.cpp
imperative/python/src/utils.cpp
+4
-0
imperative/python/test/integration/test_dp_correctness.py
imperative/python/test/integration/test_dp_correctness.py
+4
-0
imperative/python/test/unit/functional/test_distributed.py
imperative/python/test/unit/functional/test_distributed.py
+11
-11
imperative/python/test/unit/test_autodiff.py
imperative/python/test/unit/test_autodiff.py
+4
-0
imperative/src/impl/profiler.cpp
imperative/src/impl/profiler.cpp
+10
-0
imperative/src/impl/proxy_graph.cpp
imperative/src/impl/proxy_graph.cpp
+4
-0
imperative/test/CMakeLists.txt
imperative/test/CMakeLists.txt
+5
-2
python_module/CMakeLists.txt
python_module/CMakeLists.txt
+4
-1
scripts/cmake-build/BUILD_README.md
scripts/cmake-build/BUILD_README.md
+8
-1
scripts/cmake-build/host_build.sh
scripts/cmake-build/host_build.sh
+40
-3
scripts/whl/BUILD_PYTHON_WHL_README.md
scripts/whl/BUILD_PYTHON_WHL_README.md
+8
-5
scripts/whl/macos/macos_build_whl.sh
scripts/whl/macos/macos_build_whl.sh
+58
-10
scripts/whl/windows/windows_build_whl.sh
scripts/whl/windows/windows_build_whl.sh
+67
-21
src/core/impl/graph/seq_sublinear_memory.cpp
src/core/impl/graph/seq_sublinear_memory.cpp
+9
-0
src/opr/test/blas.cpp
src/opr/test/blas.cpp
+5
-0
src/opr/test/muxing.cpp
src/opr/test/muxing.cpp
+5
-0
test/CMakeLists.txt
test/CMakeLists.txt
+11
-5
未找到文件。
CMakeLists.txt
浏览文件 @
6e882c1a
...
...
@@ -697,8 +697,10 @@ endif()
if
(
MGE_WITH_PYTHON_MODULE
)
if
(
MGE_BUILD_IMPERATIVE_RT
)
add_subdirectory
(
imperative
)
message
(
"-- Enable imperative python wrapper runtime"
)
else
()
add_subdirectory
(
python_module
)
message
(
"-- Enable legacy python wrapper runtime"
)
endif
()
endif
()
...
...
dnn/src/common/utils.h
浏览文件 @
6e882c1a
...
...
@@ -342,7 +342,11 @@ template <typename T>
struct
SafeMultiplies
;
template
<
typename
T
>
#if __cplusplus >= 201703L
struct
_SafeMultipliesImplUnsigned
{
#else
struct
_SafeMultipliesImplUnsigned
:
public
std
::
binary_function
<
T
,
T
,
T
>
{
#endif
static
MEGDNN_CONSTEXPR
size_t
nbits
=
sizeof
(
T
)
*
8
;
static
size_t
clz
(
unsigned
x
)
{
...
...
dnn/test/CMakeLists.txt
浏览文件 @
6e882c1a
...
...
@@ -70,8 +70,10 @@ if (MEG_WITH_ROCM)
target_link_libraries
(
megdnn_test
${
MGE_ROCM_LIBS
}
)
endif
()
if
(
APPLE OR ANDROID
)
if
(
UNIX
)
if
(
APPLE OR ANDROID
)
target_link_libraries
(
megdnn_test dl
)
else
()
else
()
target_link_libraries
(
megdnn_test dl rt
)
endif
()
endif
()
dnn/test/common/mesh_indexing.h
浏览文件 @
6e882c1a
...
...
@@ -89,7 +89,7 @@ public:
auto
ptr
=
tensor
.
ptr
<
int
>
();
for
(
size_t
n
=
0
;
n
<
size
;
++
n
)
{
std
::
set
<
int
>
used
;
std
::
random_shuffle
(
seq
.
begin
(),
seq
.
end
());
COMPAT_RANDOM
(
seq
.
begin
(),
seq
.
end
());
for
(
size_t
step
=
0
;
step
<
stride
;
++
step
)
{
megdnn_assert
(
used
.
size
()
<
m_size
);
ptr
[
n
*
stride
+
step
]
=
seq
[
step
];
...
...
dnn/test/common/rng.cpp
浏览文件 @
6e882c1a
...
...
@@ -75,7 +75,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) {
i2f
.
i
=
static_cast
<
uint16_t
>
(
x
);
m_sequence
.
push_back
(
i2f
.
f
);
}
std
::
random_shuffle
(
m_sequence
.
begin
(),
m_sequence
.
end
());
COMPAT_RANDOM
(
m_sequence
.
begin
(),
m_sequence
.
end
());
}
Float16PeriodicalRNG
::
Float16PeriodicalRNG
(
size_t
range
)
:
m_offset
(
0
)
{
...
...
@@ -99,7 +99,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) {
m_sequence
.
push_back
(
i2f
.
f
);
}
std
::
random_shuffle
(
m_sequence
.
begin
(),
m_sequence
.
end
());
COMPAT_RANDOM
(
m_sequence
.
begin
(),
m_sequence
.
end
());
}
void
Float16PeriodicalRNG
::
gen
(
const
TensorND
&
tensor
)
{
...
...
dnn/test/common/rng.h
浏览文件 @
6e882c1a
...
...
@@ -19,6 +19,16 @@
namespace
megdnn
{
namespace
test
{
#if __cplusplus >= 201703L
#define COMPAT_RANDOM(begin, end) \
{ \
std::default_random_engine rng_engine; \
std::shuffle(begin, end, rng_engine); \
}
#else
#define COMPAT_RANDOM(begin, end) std::random_shuffle(begin, end);
#endif
class
RNG
{
protected:
class
RNGxorshf
;
...
...
dnn/test/cuda/argmxx.cpp
浏览文件 @
6e882c1a
...
...
@@ -24,14 +24,15 @@ class ArgmxxRNG final: public RNG {
void
gen
(
const
TensorND
&
tensor
)
override
{
auto
offset
=
tensor
.
layout
.
span
().
low_elem
;
auto
nr_elems
=
tensor
.
layout
.
span
().
dist_elem
();
#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset+i] = i;
\
ptr[offset + i] = i;
\
} \
std::random_shuffle
(ptr + offset, ptr + offset + nr_elems); \
COMPAT_RANDOM
(ptr + offset, ptr + offset + nr_elems); \
}
MEGDNN_FOREACH_COMPUTING_DTYPE
(
cb
);
#undef cb
...
...
dnn/test/cuda/argsort.cpp
浏览文件 @
6e882c1a
...
...
@@ -32,7 +32,7 @@ class ArgsortRNG final : public RNG {
}
else
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
ptr
[
i
]
=
static_cast
<
T
>
(
i
-
n
/
2
);
std
::
random_shuffle
(
ptr
,
ptr
+
n
);
COMPAT_RANDOM
(
ptr
,
ptr
+
n
);
}
}
...
...
@@ -86,7 +86,7 @@ void run_backward_test(Handle* handle, DType dtype) {
for
(
size_t
j
=
0
;
j
<
n
;
++
j
)
{
ptr
[
j
]
=
j
;
}
std
::
random_shuffle
(
ptr
,
ptr
+
n
);
COMPAT_RANDOM
(
ptr
,
ptr
+
n
);
ptr
+=
n
;
}
}
...
...
dnn/test/cuda/relayout.cpp
浏览文件 @
6e882c1a
...
...
@@ -361,9 +361,8 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_7) {
for
(
size_t
r
=
0
;
r
<
_dim
.
size
();
r
++
)
permutation
[
r
]
=
r
;
for
(
int
nsample
=
0
;
nsample
<
50
;
nsample
++
)
{
std
::
random_shuffle
(
_dim
.
begin
(),
_dim
.
end
());
std
::
random_shuffle
(
permutation
.
begin
(),
permutation
.
end
());
COMPAT_RANDOM
(
_dim
.
begin
(),
_dim
.
end
());
COMPAT_RANDOM
(
permutation
.
begin
(),
permutation
.
end
());
if
(
!
isTrivial
(
permutation
))
{
run
({{
_dim
[
0
],
_dim
[
1
],
_dim
[
2
],
_dim
[
3
],
_dim
[
4
],
_dim
[
5
],
_dim
[
6
]},
...
...
@@ -451,9 +450,10 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_5) {
printf
(
"vol %d cur_ratio %lf | %lf
\n
"
,
vol
,
cur_ratio
,
vol_re
);
// printVec(dim);
std
::
random_shuffle
(
dim
.
begin
(),
dim
.
end
());
COMPAT_RANDOM
(
dim
.
begin
(),
dim
.
end
());
while
(
isTrivial
(
permutation
))
{
std
::
random_shuffle
(
permutation
.
begin
(),
permutation
.
end
());
COMPAT_RANDOM
(
permutation
.
begin
(),
permutation
.
end
());
}
run
({{
dim
[
0
],
dim
[
1
],
dim
[
2
],
dim
[
3
],
dim
[
4
]},
dtype
::
Int32
()},
...
...
@@ -603,8 +603,9 @@ TEST_F(CUDA, BENCHMARK_LAST_CONTIG_ALIGN_TEST) {
for
(
size_t
r
=
0
;
r
<
_dim
.
size
();
r
++
)
permutation
[
r
]
=
r
;
for
(
int
nsample
=
0
;
nsample
<
20
;
nsample
++
)
{
std
::
random_shuffle
(
_dim
.
begin
(),
_dim
.
end
()
-
1
);
std
::
random_shuffle
(
permutation
.
begin
(),
permutation
.
end
()
-
1
);
COMPAT_RANDOM
(
_dim
.
begin
(),
_dim
.
end
()
-
1
);
COMPAT_RANDOM
(
permutation
.
begin
(),
permutation
.
end
()
-
1
);
if
(
nsample
<
5
)
_dim
[
5
]
=
(
u
.
gen_single_val
()
/
4
+
1
)
*
4
;
...
...
dnn/test/cuda/sleep.cpp
浏览文件 @
6e882c1a
...
...
@@ -24,7 +24,7 @@ using namespace test;
TEST_F
(
CUDA
,
SLEEP
)
{
auto
opr
=
this
->
handle_cuda
()
->
create_operator
<
Sleep
>
();
auto
opr
=
this
->
handle_cuda
()
->
create_operator
<
megdnn
::
SleepForward
>
();
auto
run
=
[
&
](
float
time
)
->
double
{
opr
->
param
()
=
{
time
};
...
...
dnn/test/rocm/argmxx.cpp
浏览文件 @
6e882c1a
...
...
@@ -24,14 +24,15 @@ class ArgmxxRNG final: public RNG {
void
gen
(
const
TensorND
&
tensor
)
override
{
auto
offset
=
tensor
.
layout
.
span
().
low_elem
;
auto
nr_elems
=
tensor
.
layout
.
span
().
dist_elem
();
#define cb(DType) \
if (tensor.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
auto ptr = tensor.ptr<ctype>(); \
for (size_t i = 0; i < nr_elems; ++i) { \
ptr[offset+i] = i;
\
ptr[offset + i] = i;
\
} \
std::random_shuffle
(ptr + offset, ptr + offset + nr_elems); \
COMPAT_RANDOM
(ptr + offset, ptr + offset + nr_elems); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT
(
cb
);
...
...
imperative/CMakeLists.txt
浏览文件 @
6e882c1a
...
...
@@ -76,7 +76,11 @@ add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT})
add_subdirectory
(
${
PROJECT_SOURCE_DIR
}
/third_party/pybind11
${
PROJECT_BINARY_DIR
}
/third_party/pybind11
)
pybind11_add_module
(
${
MODULE_NAME
}
NO_EXTRAS
${
SRCS
}
)
target_link_libraries
(
${
MODULE_NAME
}
PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=
${
VERSION_SCRIPT
}
)
if
(
APPLE OR MSVC OR WIN32
)
target_link_libraries
(
${
MODULE_NAME
}
PRIVATE gen_op_def megbrain megdnn
)
else
()
target_link_libraries
(
${
MODULE_NAME
}
PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=
${
VERSION_SCRIPT
}
)
endif
()
if
(
MGE_WITH_DISTRIBUTED
)
message
(
"Imperative configured to link megray"
)
target_link_libraries
(
${
MODULE_NAME
}
PRIVATE megray
)
...
...
@@ -91,6 +95,10 @@ set_target_properties(${MODULE_NAME} PROPERTIES
SUFFIX
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
LIBRARY_OUTPUT_DIRECTORY
${
MEGENGINE_DIR
}
/
${
PACKAGE_NAME
}
/core
)
if
(
APPLE OR MSVC OR WIN32
)
message
(
"-- overwriting SUFFIX at macos and windows before config by set_target_properties"
)
pybind11_extension
(
${
MODULE_NAME
}
)
endif
()
add_dependencies
(
${
MODULE_NAME
}
gen_opr_py _version_ld
)
if
(
MGE_WITH_TEST AND MGE_ENABLE_RTTI
)
...
...
imperative/python/megengine/__init__.py
浏览文件 @
6e882c1a
...
...
@@ -8,6 +8,67 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
os
import
sys
import
platform
import
ctypes
if
sys
.
platform
==
"win32"
:
lib_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"core/lib"
)
dll_paths
=
list
(
filter
(
os
.
path
.
exists
,
[
lib_path
,]))
assert
len
(
dll_paths
)
>
0
kernel32
=
ctypes
.
WinDLL
(
"kernel32.dll"
,
use_last_error
=
True
)
has_load_library_attr
=
hasattr
(
kernel32
,
"AddDllDirectory"
)
old_error_mode
=
kernel32
.
SetErrorMode
(
0x0001
)
kernel32
.
LoadLibraryW
.
restype
=
ctypes
.
c_void_p
if
has_load_library_attr
:
kernel32
.
AddDllDirectory
.
restype
=
ctypes
.
c_void_p
kernel32
.
LoadLibraryExW
.
restype
=
ctypes
.
c_void_p
for
dll_path
in
dll_paths
:
if
sys
.
version_info
>=
(
3
,
8
):
os
.
add_dll_directory
(
dll_path
)
elif
has_load_library_attr
:
res
=
kernel32
.
AddDllDirectory
(
dll_path
)
if
res
is
None
:
err
=
ctypes
.
WinError
(
ctypes
.
get_last_error
())
err
.
strerror
+=
' Error adding "{}" to the DLL search PATH.'
.
format
(
dll_path
)
raise
err
else
:
print
(
"WARN: python or OS env have some issue, may load DLL failed!!!"
)
import
glob
dlls
=
glob
.
glob
(
os
.
path
.
join
(
lib_path
,
"*.dll"
))
path_patched
=
False
for
dll
in
dlls
:
is_loaded
=
False
if
has_load_library_attr
:
res
=
kernel32
.
LoadLibraryExW
(
dll
,
None
,
0x00001100
)
last_error
=
ctypes
.
get_last_error
()
if
res
is
None
and
last_error
!=
126
:
err
=
ctypes
.
WinError
(
last_error
)
err
.
strerror
+=
' Error loading "{}" or one of its dependencies.'
.
format
(
dll
)
raise
err
elif
res
is
not
None
:
is_loaded
=
True
if
not
is_loaded
:
if
not
path_patched
:
os
.
environ
[
"PATH"
]
=
";"
.
join
(
dll_paths
+
[
os
.
environ
[
"PATH"
]])
path_patched
=
True
res
=
kernel32
.
LoadLibraryW
(
dll
)
if
res
is
None
:
err
=
ctypes
.
WinError
(
ctypes
.
get_last_error
())
err
.
strerror
+=
' Error loading "{}" or one of its dependencies.'
.
format
(
dll
)
raise
err
kernel32
.
SetErrorMode
(
old_error_mode
)
from
.core._imperative_rt.utils
import
_set_fork_exec_path_for_timed_func
from
.device
import
*
...
...
imperative/python/megengine/utils/max_recursion_limit.py
浏览文件 @
6e882c1a
...
...
@@ -6,10 +6,14 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
resource
import
platform
import
sys
import
threading
# Windows do not imp resource package
if
platform
.
system
()
!=
"Windows"
:
import
resource
class
AlternativeRecursionLimit
:
r
"""A reentrant context manager for setting global recursion limits.
...
...
@@ -28,14 +32,22 @@ class AlternativeRecursionLimit:
with
self
.
lock
:
if
self
.
count
==
0
:
self
.
orig_py_limit
=
sys
.
getrecursionlimit
()
if
platform
.
system
()
!=
"Windows"
:
(
self
.
orig_rlim_stack_soft
,
self
.
orig_rlim_stack_hard
,
)
=
resource
.
getrlimit
(
resource
.
RLIMIT_STACK
)
# FIXME: https://bugs.python.org/issue34602, python3 release version
# on Macos always have this issue, not all user install python3 from src
try
:
resource
.
setrlimit
(
resource
.
RLIMIT_STACK
,
(
self
.
orig_rlim_stack_hard
,
self
.
orig_rlim_stack_hard
),
)
except
ValueError
as
exc
:
if
platform
.
system
()
!=
"Darwin"
:
raise
exc
# increase recursion limit
sys
.
setrecursionlimit
(
self
.
new_py_limit
)
self
.
count
+=
1
...
...
@@ -45,10 +57,16 @@ class AlternativeRecursionLimit:
self
.
count
-=
1
if
self
.
count
==
0
:
sys
.
setrecursionlimit
(
self
.
orig_py_limit
)
if
platform
.
system
()
!=
"Windows"
:
try
:
resource
.
setrlimit
(
resource
.
RLIMIT_STACK
,
(
self
.
orig_rlim_stack_soft
,
self
.
orig_rlim_stack_hard
),
)
except
ValueError
as
exc
:
if
platform
.
system
()
!=
"Darwin"
:
raise
exc
_max_recursion_limit_context_manager
=
AlternativeRecursionLimit
(
2
**
31
-
1
)
...
...
imperative/python/setup.py
浏览文件 @
6e882c1a
...
...
@@ -9,6 +9,7 @@
import
os
import
re
import
pathlib
import
platform
from
distutils.file_util
import
copy_file
from
setuptools
import
setup
,
find_packages
,
Extension
from
setuptools.command.build_ext
import
build_ext
as
_build_ext
...
...
@@ -29,6 +30,9 @@ class build_ext(_build_ext):
extdir
.
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
modpath
=
self
.
get_ext_fullname
(
ext
.
name
).
split
(
'.'
)
if
platform
.
system
()
==
'Windows'
:
modpath
[
-
1
]
+=
'.pyd'
else
:
modpath
[
-
1
]
+=
'.so'
modpath
=
str
(
pathlib
.
Path
(
*
modpath
).
resolve
())
...
...
@@ -47,6 +51,14 @@ if local_version:
__version__
=
'{}+{}'
.
format
(
__version__
,
local_version
)
packages
=
find_packages
(
exclude
=
[
'test'
])
package_data
=
[
str
(
f
.
relative_to
(
'megengine'
))
for
f
in
pathlib
.
Path
(
'megengine'
,
'core'
,
'include'
).
glob
(
'**/*'
)
]
package_data
+=
[
str
(
f
.
relative_to
(
'megengine'
))
for
f
in
pathlib
.
Path
(
'megengine'
,
'core'
,
'lib'
).
glob
(
'**/*'
)
]
with
open
(
'requires.txt'
)
as
f
:
requires
=
f
.
read
().
splitlines
()
...
...
@@ -63,6 +75,9 @@ setup_kwargs = dict(
author
=
'Megvii Engine Team'
,
author_email
=
email
,
packages
=
packages
,
package_data
=
{
'megengine'
:
package_data
,
},
ext_modules
=
[
PrecompiledExtesion
(
'megengine.core._imperative_rt'
)],
install_requires
=
requires
,
extras_require
=
{
...
...
imperative/python/src/helper.cpp
浏览文件 @
6e882c1a
...
...
@@ -9,15 +9,6 @@
#include "megbrain/utils/mempool.h"
#include "./numpy_dtypes.h"
/*
* demangle typeid, see
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname
*/
#ifdef __GNUG__
#include <cstdlib>
#include <memory>
#include <cxxabi.h>
namespace
py
=
pybind11
;
PyTaskDipatcher
py_task_q
=
{};
...
...
@@ -34,10 +25,18 @@ py::module rel_import(py::str name, py::module m, int level) {
return
import
(
name
,
m
.
attr
(
"__dict__"
),
py
::
arg
(
"level"
)
=
level
);
}
/*
* demangle typeid, see
* http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname
*/
#ifdef __GNUG__
#include <cxxabi.h>
#include <cstdlib>
#include <memory>
namespace
{
std
::
string
demangle_typeid
(
const
char
*
name
)
{
int
status
=
-
4
;
// some arbitrary value to eliminate the compiler warning
// enable c++11 by passing the flag -std=c++11 to g++
...
...
@@ -48,7 +47,7 @@ std::string demangle_typeid(const char* name) {
return
(
status
==
0
)
?
res
.
get
()
:
name
;
}
}
}
// namespace
#else
namespace
{
...
...
imperative/python/src/utils.cpp
浏览文件 @
6e882c1a
#include "utils.h"
#ifdef WIN32
#include <stdio.h>
#include <windows.h>
#endif
#include <pybind11/operators.h>
#include <atomic>
...
...
imperative/python/test/integration/test_dp_correctness.py
浏览文件 @
6e882c1a
...
...
@@ -8,6 +8,7 @@
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
multiprocessing
as
mp
import
os
import
platform
import
re
import
subprocess
import
sys
...
...
@@ -196,6 +197,9 @@ def run_test(
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
def
test_dp_correctness
():
model_name
=
"mnist_model_with_test.mge"
model_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
model_name
)
...
...
imperative/python/test/unit/functional/test_distributed.py
浏览文件 @
6e882c1a
...
...
@@ -35,7 +35,7 @@ from megengine.functional.distributed import (
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_reduce_sum
():
...
...
@@ -77,7 +77,7 @@ def test_reduce_sum():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_broadcast
():
...
...
@@ -115,7 +115,7 @@ def test_broadcast():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_gather
():
...
...
@@ -154,7 +154,7 @@ def test_all_gather():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_reduce_scatter_sum
():
...
...
@@ -193,7 +193,7 @@ def test_reduce_scatter_sum():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_sum
():
...
...
@@ -232,7 +232,7 @@ def test_all_reduce_sum():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_max
():
...
...
@@ -271,7 +271,7 @@ def test_all_reduce_max():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_reduce_min
():
...
...
@@ -310,7 +310,7 @@ def test_all_reduce_min():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_gather
():
...
...
@@ -352,7 +352,7 @@ def test_gather():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_scatter
():
...
...
@@ -390,7 +390,7 @@ def test_scatter():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_all_to_all
():
...
...
@@ -430,7 +430,7 @@ def test_all_to_all():
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"
do not imp GPU mode at Windows now
"
platform
.
system
()
==
"Windows"
,
reason
=
"
windows disable MGB_ENABLE_OPR_MM
"
)
@
pytest
.
mark
.
isolated_distributed
def
test_io_remote
():
...
...
imperative/python/test/unit/test_autodiff.py
浏览文件 @
6e882c1a
...
...
@@ -6,6 +6,7 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
platform
import
weakref
import
numpy
as
np
...
...
@@ -51,6 +52,9 @@ def save_to(self, name="grad"):
@
pytest
.
mark
.
isolated_distributed
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
def
test_dist_grad
():
world_size
=
2
x_np
=
np
.
random
.
rand
(
10
).
astype
(
"float32"
)
...
...
imperative/src/impl/profiler.cpp
浏览文件 @
6e882c1a
...
...
@@ -9,7 +9,17 @@
#include "megbrain/imperative/profiler.h"
#if defined(_MSC_VER) || defined(WIN32)
#include <windows.h>
#define getpid GetCurrentProcessId
#else
#include <sys/unistd.h>
#endif
#if defined(__APPLE__) || defined(__MACOSX)
#include <unistd.h>
#endif
#include <variant>
#include "megbrain/imperative/ops/opr_attr.h"
...
...
imperative/src/impl/proxy_graph.cpp
浏览文件 @
6e882c1a
...
...
@@ -16,6 +16,10 @@
#include "megbrain/imperative/ops/opr_attr.h"
#include "megbrain/imperative/ops/backward_graph.h"
#if __cplusplus >= 201703L
#include <optional>
#endif
namespace
mgb
{
namespace
imperative
{
...
...
imperative/test/CMakeLists.txt
浏览文件 @
6e882c1a
...
...
@@ -38,8 +38,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS)
endif
()
if
(
UNIX
)
if
(
APPLE OR ANDROID
)
target_link_libraries
(
imperative_test dl
)
else
()
target_link_libraries
(
imperative_test dl rt
)
endif
()
endif
()
install
(
TARGETS imperative_test RUNTIME DESTINATION test
)
python_module/CMakeLists.txt
浏览文件 @
6e882c1a
...
...
@@ -81,7 +81,10 @@ else()
target_link_libraries
(
mgb megbrain megdnn -Wl,--version-script=
${
VERSION_SCRIPT
}
)
endif
()
target_include_directories
(
mgb PRIVATE
${
PYTHON_INCLUDE_DIRS
}
src/cpp
${
CMAKE_CURRENT_BINARY_DIR
}
${
NUMPY_INCLUDE_DIR
}
)
target_link_libraries
(
mgb
${
PYTHON_LIBRARIES
}
)
# only windows need link PYTHON_LIBRARIES
if
(
MSVC OR WIN32
)
target_link_libraries
(
mgb
${
PYTHON_LIBRARIES
}
)
endif
()
if
(
MGE_WITH_DISTRIBUTED
)
target_link_libraries
(
mgb megray
)
...
...
scripts/cmake-build/BUILD_README.md
浏览文件 @
6e882c1a
...
...
@@ -30,11 +30,17 @@
4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env
4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path
if u do not do 4d/4e/4f, CUDA runtime can not find dll
5: install python3 (DFT 3.8.3) to /c/Users/${USER}/mge_whl_python_env/3.8.3 and
put it to PATH env and run python3 -m pip install numpy (if u want to build with training mode or build python whl)
6: install swig from install gui (if u want to build with training mode or build python whl)
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip
b: install swig to /c/Users/${USER}/swigwin-4.0.2
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2
```
### linux host build
```
1: cmake, which version > 3.14.4
2: gcc/g++, which version > 6
2: gcc/g++, which version > 6
, (gcc/g++ >= 7, if need build training)
3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl
4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool:
5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo
...
...
@@ -47,6 +53,7 @@
3: brew install python python3 swig coreutils
4: install at least xcode command line tool: https://developer.apple.com/xcode/
5: about cuda: we do not support CUDA on macos
6: python3 -m pip install numpy (if u want to build with training mode or build python whl)
```
### cross build for arm-android
now we support windows/linux/macos cross build to arm-android
...
...
scripts/cmake-build/host_build.sh
浏览文件 @
6e882c1a
...
...
@@ -9,6 +9,7 @@ function usage() {
echo
"-t : Build with training mode, default inference only"
echo
"-m : Build with m32 mode(only for windows build), default m64"
echo
"-r : remove old build dir before make, default off"
echo
"-n : enable new python runtime(valid when training mode with -t, default is legacy runtime)"
echo
"-h : show usage"
echo
"append other cmake config by export EXTRA_CMAKE_ARGS=..."
echo
"example:
$0
-d"
...
...
@@ -22,9 +23,10 @@ MGE_WINDOWS_BUILD_ARCH=x64
MGE_WINDOWS_BUILD_MARCH
=
m64
MGE_ARCH
=
x86_64
REMOVE_OLD_BUILD
=
false
MGE_BUILD_IMPERATIVE_RT
=
OFF
echo
"EXTRA_CMAKE_ARGS:
${
EXTRA_CMAKE_ARGS
}
"
while
getopts
"rhdctm"
arg
while
getopts
"rhdctm
n
"
arg
do
case
$arg
in
d
)
...
...
@@ -48,11 +50,15 @@ do
REMOVE_OLD_BUILD
=
true
;;
m
)
echo
"build for m32(only use for windows)"
echo
"build for m32(only
valid
use for windows)"
MGE_WINDOWS_BUILD_ARCH
=
x86
MGE_WINDOWS_BUILD_MARCH
=
m32
MGE_ARCH
=
i386
;;
n
)
echo
"Enable imperative python wrapper runtime"
MGE_BUILD_IMPERATIVE_RT
=
ON
;;
?
)
echo
"unkonw argument"
usage
...
...
@@ -101,6 +107,7 @@ function cmake_build() {
cmake
\
-DCMAKE_BUILD_TYPE
=
$BUILD_TYPE
\
-DMGE_INFERENCE_ONLY
=
$MGE_INFERENCE_ONLY
\
-DMGE_BUILD_IMPERATIVE_RT
=
${
MGE_BUILD_IMPERATIVE_RT
}
\
-DMGE_WITH_CUDA
=
$MGE_WITH_CUDA
\
-DCMAKE_INSTALL_PREFIX
=
$INSTALL_DIR
\
${
EXTRA_CMAKE_ARGS
}
\
...
...
@@ -112,7 +119,7 @@ function cmake_build() {
function
windows_env_err
()
{
echo
"check windows env failed!!"
echo
"please install
LLVM/clang-cl/cmake/python at Visual Studio Extensions
"
echo
"please install
env refs for: scripts/cmake-build/BUILD_README.md
"
exit
-1
}
...
...
@@ -178,6 +185,25 @@ function prepare_env_for_windows_build() {
export
CPATH
=
$CPATH
:
$NIVIDA_INSTALL_PRE
/
${
TRT_V
}
/include:
$NIVIDA_INSTALL_PRE
/CUDA/
${
CUDA_V
}
/include:
$NIVIDA_INSTALL_PRE
/CUDA/
${
CUDA_V
}
/include/nvtx3:
$PC_CUDNN_INCLUDE_DIRS
export
LIBRARY_PATH
=
$LIBRARY_PATH
:
$LD_LIBRARY_PATH
export
INCLUDE
=
$INCLUDE
:
$CPATH
# python version will be config by whl build script or ci script, we need
# a DFT version for build success when we just call host_build.sh
if
[[
-z
${
ALREADY_CONFIG_PYTHON_VER
}
]]
then
echo
"config a default python3"
DFT_PYTHON_BIN
=
/c/Users/
${
USER
}
/mge_whl_python_env/3.8.3
if
[
!
-f
"
${
DFT_PYTHON_BIN
}
/python3.exe"
]
;
then
echo
"ERR: can not find
${
DFT_PYTHON_BIN
}
/python3.exe , Invalid env"
windows_env_err
else
echo
"put python3 to env..."
export
PATH
=
${
DFT_PYTHON_BIN
}
:
$PATH
which python3
fi
fi
echo
"export swig pwd to PATH"
export
PATH
=
/c/Users/
${
USER
}
/swigwin-4.0.2::
$PATH
}
WINDOWS_BUILD_TARGET
=
"Ninja all > build.log"
...
...
@@ -218,6 +244,7 @@ function cmake_build_windows() {
vcvarsall.bat
$MGE_WINDOWS_BUILD_ARCH
&& cmake -G "
Ninja
"
\
-DMGE_ARCH=
$MGE_ARCH
\
-DMGE_INFERENCE_ONLY=
$MGE_INFERENCE_ONLY
\
-DMGE_BUILD_IMPERATIVE_RT=
${
MGE_BUILD_IMPERATIVE_RT
}
\
-DMGE_WITH_CUDA=
$MGE_WITH_CUDA
\
-DCMAKE_BUILD_TYPE=
$BUILD_TYPE
\
-DCMAKE_INSTALL_PREFIX:PATH=
$INSTALL_DIR
\
...
...
@@ -230,8 +257,18 @@ function cmake_build_windows() {
${
WINDOWS_BUILD_TARGET
}
"
}
if
[
${
MGE_BUILD_IMPERATIVE_RT
}
=
"ON"
]
&&
[
${
MGE_INFERENCE_ONLY
}
=
"ON"
]
;
then
echo
"ERR: MGE_BUILD_IMPERATIVE_RT(-n) only valid when enable training mode(-t)"
echo
"pls remove -n or add -t"
exit
-1
fi
if
[[
$OS
=
~
"NT"
]]
;
then
if
[
${
MGE_ARCH
}
=
"i386"
]
&&
[
${
MGE_INFERENCE_ONLY
}
=
"OFF"
]
;
then
echo
"ERR: training mode(-t) only support 64 bit mode"
echo
"pls remove -t or remove -m"
exit
-1
fi
config_windows_build_target
cmake_build_windows
$MGE_WITH_CUDA
$MGE_INFERENCE_ONLY
$BUILD_TYPE
else
...
...
scripts/whl/BUILD_PYTHON_WHL_README.md
浏览文件 @
6e882c1a
...
...
@@ -53,10 +53,6 @@
d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip
d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt
d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate
5: install swig from install gui
a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip
b: install swig to /c/Users/${USER}/swigwin-4.0.2
c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2
```
# how to build
...
...
@@ -90,6 +86,11 @@
```
ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh
```
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg:
```
ALL_PYTHON=3.5.9 BUILD_IMPERATIVE="ON" ./scripts/whl/macos/macos_build_whl.sh
```
## build for windows
```
./scripts/whl/windows/windows_build_whl.sh
...
...
@@ -102,5 +103,7 @@
If you want to build windows whl with cuda, also a specific Python verison. eg:
```
WINDOWS_WHL_WITH_CUDA="
true
" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh
WINDOWS_WHL_WITH_CUDA="
ON
" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh
```
If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg:
BUILD_IMPERATIVE="ON" WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh
scripts/whl/macos/macos_build_whl.sh
浏览文件 @
6e882c1a
...
...
@@ -65,16 +65,18 @@ function config_python_env() {
fi
echo
${
ver
}
#config a dir to trick cmake find a null pythonlib
PYTHON_LIBRARY
=
${
PYTHON_DIR
}
lib/
if
[
"
$1
"
=
"3.5.9"
]
;
then
PYTHON_INCLUDE_DIR
=
${
PYTHON_DIR
}
include/python3.5m
PYTHON_LIBRARY
=
${
PYTHON_DIR
}
/lib/libpython3.5m.dylib
elif
[
"
$1
"
=
"3.6.10"
]
;
then
PYTHON_INCLUDE_DIR
=
${
PYTHON_DIR
}
include/python3.6m
PYTHON_LIBRARY
=
${
PYTHON_DIR
}
/lib/libpython3.6m.dylib
elif
[
"
$1
"
=
"3.7.7"
]
;
then
PYTHON_INCLUDE_DIR
=
${
PYTHON_DIR
}
include/python3.7m
PYTHON_LIBRARY
=
${
PYTHON_DIR
}
/lib/libpython3.7m.dylib
elif
[
"
$1
"
=
"3.8.3"
]
;
then
PYTHON_INCLUDE_DIR
=
${
PYTHON_DIR
}
include/python3.8
PYTHON_LIBRARY
=
${
PYTHON_DIR
}
/lib/libpython3.8.dylib
else
echo
"ERR: DO NOT SUPPORT PYTHON VERSION"
echo
"now support list:
${
FULL_PYTHON_VER
}
"
...
...
@@ -82,6 +84,11 @@ function config_python_env() {
fi
}
if
[[
-z
${
BUILD_IMPERATIVE
}
]]
then
BUILD_IMPERATIVE
=
"OFF"
fi
function
do_build
()
{
for
ver
in
${
ALL_PYTHON
}
do
...
...
@@ -89,7 +96,7 @@ function do_build() {
config_python_env
${
ver
}
#check env
if
[
!
-
d
"
$PYTHON_LIBRARY
"
]
;
then
if
[
!
-
f
"
$PYTHON_LIBRARY
"
]
;
then
echo
"ERR: can not find
$PYTHON_LIBRARY
, Invalid python package"
err_env
fi
...
...
@@ -102,14 +109,20 @@ function do_build() {
#append cmake args for config python
export
EXTRA_CMAKE_ARGS
=
"-DCMAKE_PREFIX_PATH=
${
PYTHON_DIR
}
-DPYTHON_LIBRARY=
${
PYTHON_LIBRARY
}
-DPYTHON_INCLUDE_DIR=
${
PYTHON_INCLUDE_DIR
}
"
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc
export
EXTRA_CMAKE_ARGS
=
${
EXTRA_CMAKE_ARGS
}
"
-DCMAKE_BUILD_TYPE=RelWithDebInfo "
export
EXTRA_CMAKE_ARGS
=
"
${
EXTRA_CMAKE_ARGS
}
-DCMAKE_BUILD_TYPE=RelWithDebInfo "
#call build and install
#FIXME: cmake do not triger update python config, after
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add
#-r to remove build cache after a new ver build, which
#will be more slow build than without -r
if
[
${
BUILD_IMPERATIVE
}
=
"ON"
]
;
then
echo
"build whl with IMPERATIVE python rt"
${
SRC_DIR
}
/scripts/cmake-build/host_build.sh
-t
-n
-r
else
echo
"build whl with legacy python rt"
${
SRC_DIR
}
/scripts/cmake-build/host_build.sh
-t
-r
fi
#call setup.py
BUILD_DIR
=
${
SRC_DIR
}
/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/
...
...
@@ -121,12 +134,47 @@ function do_build() {
fi
mkdir
-p
staging
if
[
${
BUILD_IMPERATIVE
}
=
"ON"
]
;
then
echo
"build whl with IMPERATIVE python rt"
cp
-a
imperative/python/
{
megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt
}
staging/
cd
${
BUILD_DIR
}
/staging/megengine/core
rt_file
=
`
ls
_imperative_rt.
*
.so
`
echo
"rt file is:
${
rt_file
}
"
if
[[
-z
${
rt_file
}
]]
then
echo
"ERR: can not find valid rt file"
exit
-1
fi
llvm-strip
-s
${
rt_file
}
mv
${
rt_file
}
_imperative_rt.so
echo
"check so valid or not..."
otool_out
=
`
otool
-L
_imperative_rt.so
`
if
[[
"
${
otool_out
}
"
=
~
"ython"
]]
;
then
echo
"ERR: invalid _imperative_rt.so which depend on python lib, detail: log"
echo
${
otool_out
}
exit
-1
else
echo
"valid..."
fi
else
echo
"build whl with legacy python rt"
cp
-a
python_module/
{
megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt
}
staging/
cd
${
BUILD_DIR
}
/staging/megengine/_internal
#FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file
#will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so
echo
"check so valid or not..."
llvm-strip
-s
_mgb.so
otool_out
=
`
otool
-L
_mgb.so
`
if
[[
"
${
otool_out
}
"
=
~
"ython"
]]
;
then
echo
"ERR: invalid _mgb.so which depend on python lib, detail: log"
echo
${
otool_out
}
exit
-1
else
echo
"valid..."
fi
fi
cd
${
BUILD_DIR
}
/staging
${
PYTHON_DIR
}
/bin/python3 setup.py bdist_wheel
cd
${
BUILD_DIR
}
/staging/dist/
...
...
scripts/whl/windows/windows_build_whl.sh
浏览文件 @
6e882c1a
...
...
@@ -14,8 +14,6 @@ function err_env() {
}
function
append_path_env_and_check
()
{
echo
"export swig pwd to PATH"
export
PATH
=
/c/Users/
${
USER
}
/swigwin-4.0.2::
$PATH
echo
"export vs2019 install path"
export
VS_PATH
=
/c/Program
\
Files
\ \(
x86
\)
/Microsoft
\
Visual
\
Studio/2019/Enterprise
# for llvm-strip
...
...
@@ -62,7 +60,7 @@ function config_python_env() {
if
[[
-z
${
WINDOWS_WHL_WITH_CUDA
}
]]
then
WINDOWS_WHL_WITH_CUDA
=
"
false
"
WINDOWS_WHL_WITH_CUDA
=
"
OFF
"
fi
...
...
@@ -74,26 +72,46 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6
CURAND_LIB
=
"/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll"
CUBLASLT_LIB
=
"/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll"
CUDART_LIB
=
"/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll"
function
depend_real_copy
()
{
REAL_DST
=
$1
echo
"real copy lib to
$1
"
cp
"
${
TRT_LIB
}
"
${
REAL_DST
}
cp
"
${
CUDNN_LIB
}
"
${
REAL_DST
}
cp
"
${
CUSOLVER_LIB
}
"
${
REAL_DST
}
cp
"
${
CUBLAS_LIB
}
"
${
REAL_DST
}
cp
"
${
CURAND_LIB
}
"
${
REAL_DST
}
cp
"
${
CUBLASLT_LIB
}
"
${
REAL_DST
}
cp
"
${
CUDART_LIB
}
"
${
REAL_DST
}
}
function
copy_more_dll
()
{
# for python whl real use
CP_DST
=
${
BUILD_DIR
}
/staging/megengine/_internal/lib
rm
-rf
${
CP_DST
}
mkdir
${
CP_DST
}
if
[
${
BUILD_IMPERATIVE
}
=
"ON"
]
;
then
echo
"config BUILD_IMPERATIVE core lib dir"
CP_WHL_DST
=
${
BUILD_DIR
}
/staging/megengine/core/lib
else
echo
"config legacy python lib dir"
CP_WHL_DST
=
${
BUILD_DIR
}
/staging/megengine/_internal/lib
fi
rm
-rf
${
CP_WHL_DST
}
mkdir
${
CP_WHL_DST
}
# workround for cpu-only version import failed, use a
# empty.file to triger setup.py to create a null empty
echo
"empty"
>
${
CP_WHL_DST
}
/empty.file
if
[
${
WINDOWS_WHL_WITH_CUDA
}
=
"
true
"
]
;
then
if
[
${
WINDOWS_WHL_WITH_CUDA
}
=
"
ON
"
]
;
then
echo
"copy nvidia lib to whl use...."
cp
"
${
TRT_LIB
}
"
${
CP_DST
}
cp
"
${
CUDNN_LIB
}
"
${
CP_DST
}
cp
"
${
CUSOLVER_LIB
}
"
${
CP_DST
}
cp
"
${
CUBLAS_LIB
}
"
${
CP_DST
}
cp
"
${
CURAND_LIB
}
"
${
CP_DST
}
cp
"
${
CUBLASLT_LIB
}
"
${
CP_DST
}
cp
"
${
CUDART_LIB
}
"
${
CP_DST
}
depend_real_copy
${
CP_WHL_DST
}
fi
}
if
[[
-z
${
BUILD_IMPERATIVE
}
]]
then
BUILD_IMPERATIVE
=
"OFF"
fi
function
do_build
()
{
for
ver
in
${
ALL_PYTHON
}
do
...
...
@@ -118,21 +136,31 @@ function do_build() {
#force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python
#export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} "
#config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc
export
EXTRA_CMAKE_ARGS
=
${
EXTRA_CMAKE_ARGS
}
"
-DCMAKE_BUILD_TYPE=RelWithDebInfo "
export
EXTRA_CMAKE_ARGS
=
"
${
EXTRA_CMAKE_ARGS
}
-DCMAKE_BUILD_TYPE=RelWithDebInfo "
#call build and install
#FIXME: cmake do not triger update python config, after
#change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add
#-r to remove build cache after a new ver build, which
#will be more slow build than without -r
if
[
${
WINDOWS_WHL_WITH_CUDA
}
=
"true"
]
;
then
BUILD_ARGS
=
" -t -r"
if
[
${
BUILD_IMPERATIVE
}
=
"ON"
]
;
then
echo
"build whl with IMPERATIVE python rt"
BUILD_ARGS
=
"
${
BUILD_ARGS
}
-n "
else
echo
"build whl with legacy python rt"
fi
if
[
${
WINDOWS_WHL_WITH_CUDA
}
=
"ON"
]
;
then
echo
"build windows whl with cuda"
${
SRC_DIR
}
/scripts/cmake-build/host_build.sh
-t
-r
-c
BUILD_ARGS
=
"
${
BUILD_ARGS
}
-c "
else
echo
"build windows whl with cpu only"
${
SRC_DIR
}
/scripts/cmake-build/host_build.sh
-t
-r
fi
echo
"host_build.sh BUILD_ARGS:
${
BUILD_ARGS
}
"
${
SRC_DIR
}
/scripts/cmake-build/host_build.sh
${
BUILD_ARGS
}
#call setup.py
BUILD_DIR
=
${
SRC_DIR
}
/build_dir/host/build/
cd
${
BUILD_DIR
}
...
...
@@ -143,10 +171,27 @@ function do_build() {
fi
mkdir
-p
staging
if
[
${
BUILD_IMPERATIVE
}
=
"ON"
]
;
then
echo
"build whl with IMPERATIVE python rt"
cp
-a
imperative/python/
{
megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt
}
staging/
cd
${
BUILD_DIR
}
/staging/megengine/core
rt_file
=
`
ls
_imperative_rt.
*
.pyd
`
echo
"rt file is:
${
rt_file
}
"
if
[[
-z
${
rt_file
}
]]
then
echo
"ERR: can not find valid rt file"
exit
-1
fi
llvm-strip
-s
${
rt_file
}
mv
${
rt_file
}
_imperative_rt.pyd
else
echo
"build whl with legacy python rt"
cp
-a
python_module/
{
megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt
}
staging/
cd
${
BUILD_DIR
}
/staging/megengine/_internal
llvm-strip
-s
_mgb.pyd
fi
copy_more_dll
cd
${
BUILD_DIR
}
/staging
${
PYTHON_DIR
}
/python3 setup.py bdist_wheel
...
...
@@ -175,5 +220,6 @@ function third_party_prepare() {
}
######################
export
ALREADY_CONFIG_PYTHON_VER
=
"yes"
third_party_prepare
do_build
src/core/impl/graph/seq_sublinear_memory.cpp
浏览文件 @
6e882c1a
...
...
@@ -33,6 +33,11 @@ class RNGxorshf {
uint64_t
s
[
2
];
public:
#if __cplusplus >= 201703L
typedef
uint64_t
result_type
;
static
constexpr
uint64_t
min
()
{
return
0
;
}
static
constexpr
uint64_t
max
()
{
return
UINT64_MAX
;
}
#endif
RNGxorshf
(
uint64_t
seed
)
{
std
::
mt19937_64
gen
(
seed
);
s
[
0
]
=
gen
();
...
...
@@ -936,8 +941,12 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() {
}
}
m_cur_records
=
records
;
#if __cplusplus >= 201703L
std
::
shuffle
(
perm
.
begin
(),
perm
.
end
(),
rng
);
#else
std
::
random_shuffle
(
perm
.
begin
(),
perm
.
end
(),
[
&
](
size_t
x
)
{
return
rng
()
%
x
;
});
#endif
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
invoke_search
(
mutation
(
mutation
(
records
[
i
].
first
)));
invoke_search
(
crossover
(
records
[
i
].
first
,
records
[
perm
[
i
]].
first
));
...
...
src/opr/test/blas.cpp
浏览文件 @
6e882c1a
...
...
@@ -705,7 +705,12 @@ TEST(TestOprBlas, MatrixInverse) {
}
auto
ptr
=
inp
[
0
]
->
ptr
<
float
>
();
for
(
size_t
i
=
0
;
i
<
batch
;
++
i
,
ptr
+=
n
*
n
)
{
#if __cplusplus >= 201703L
std
::
default_random_engine
rng_engine
;
std
::
shuffle
(
perm
.
begin
(),
perm
.
end
(),
rng_engine
);
#else
std
::
random_shuffle
(
perm
.
begin
(),
perm
.
end
());
#endif
for
(
size_t
j
=
0
;
j
<
n
;
++
j
)
{
ptr
[
j
*
n
+
perm
[
j
]]
+=
5
;
}
...
...
src/opr/test/muxing.cpp
浏览文件 @
6e882c1a
...
...
@@ -36,7 +36,12 @@ void run_all_gather(const std::vector<size_t>& axis_size, bool& success,
sleep_time
.
push_back
(
i
*
0.05
+
0.1
);
tot_axis_size
+=
axis_size
[
i
];
}
#if __cplusplus >= 201703L
std
::
default_random_engine
rng_engine
;
std
::
shuffle
(
sleep_time
.
begin
(),
sleep_time
.
end
(),
rng_engine
);
#else
std
::
random_shuffle
(
sleep_time
.
begin
(),
sleep_time
.
end
());
#endif
auto
constexpr
DEVICE_TYPE
=
CompNode
::
DeviceType
::
CUDA
;
size_t
nr_dev
=
std
::
min
<
size_t
>
(
...
...
test/CMakeLists.txt
浏览文件 @
6e882c1a
...
...
@@ -18,7 +18,11 @@ endif()
add_executable
(
megbrain_test
${
SOURCES
}
)
target_link_libraries
(
megbrain_test gtest
)
target_link_libraries
(
megbrain_test megengine
)
if
(
MSVC OR WIN32
)
target_link_libraries
(
megbrain_test megbrain megdnn
)
else
()
target_link_libraries
(
megbrain_test megengine
)
endif
()
if
(
CXX_SUPPORT_WCLASS_MEMACCESS
)
if
(
MGE_WITH_CUDA
)
target_compile_options
(
megbrain_test PRIVATE
"$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>"
...
...
@@ -28,10 +32,12 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS)
endif
()
endif
()
if
(
APPLE OR ANDROID
)
if
(
UNIX
)
if
(
APPLE OR ANDROID
)
target_link_libraries
(
megbrain_test dl
)
else
()
else
()
target_link_libraries
(
megbrain_test dl rt
)
endif
()
endif
()
if
(
MGE_WITH_DISTRIBUTED
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录