Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
d7bb62cf
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
d7bb62cf
编写于
6月 02, 2020
作者:
M
Megvii Engine Team
提交者:
Xu Xinran
6月 19, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mgb): move mm_handler from python module into opr-mm
GitOrigin-RevId: f401ce86033da83a91ebea3c119fc7af54a66ba0
上级
84068a6b
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
97 addition
and
133 deletion
+97
-133
cmake/zmq.cmake
cmake/zmq.cmake
+1
-0
python_module/CMakeLists.txt
python_module/CMakeLists.txt
+2
-20
python_module/src/cpp/megbrain_config.cpp
python_module/src/cpp/megbrain_config.cpp
+37
-0
python_module/src/cpp/opr_defs.cpp
python_module/src/cpp/opr_defs.cpp
+1
-1
src/CMakeLists.txt
src/CMakeLists.txt
+9
-0
src/opr-mm/impl/mm_handler.cpp
src/opr-mm/impl/mm_handler.cpp
+36
-82
src/opr-mm/impl/zmq_rpc.cpp
src/opr-mm/impl/zmq_rpc.cpp
+2
-2
src/opr-mm/include/megbrain/opr/mm_handler.h
src/opr-mm/include/megbrain/opr/mm_handler.h
+8
-27
src/opr-mm/include/megbrain/opr/zmq_rpc.h
src/opr-mm/include/megbrain/opr/zmq_rpc.h
+1
-1
src/opr-mm/proto/mm_handler.proto
src/opr-mm/proto/mm_handler.proto
+0
-0
未找到文件。
cmake/zmq.cmake
浏览文件 @
d7bb62cf
...
...
@@ -14,6 +14,7 @@ ExternalProject_add(
)
set
(
ZMQ_INC
${
ZMQ_BUILD_DIR
}
/include
)
include_directories
(
${
ZMQ_INC
}
)
file
(
MAKE_DIRECTORY
${
ZMQ_INC
}
)
add_library
(
libzmq STATIC IMPORTED GLOBAL
)
...
...
python_module/CMakeLists.txt
浏览文件 @
d7bb62cf
...
...
@@ -12,14 +12,6 @@ set(SWIG_SRC src/swig/mgb.i)
set
(
CMAKE_SWIG_FLAGS -Wall -threads -py3 -modern -DSWIGWORDSIZE64
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-unused-parameter"
)
if
(
MGE_WITH_DISTRIBUTED
)
file
(
GLOB_RECURSE PROTO_FILES RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"src/proto/*.proto"
)
PROTOBUF_GENERATE_CPP_WITH_ROOT
(
GRPC_SRCS GRPC_HDRS
${
CMAKE_CURRENT_SOURCE_DIR
}
${
PROTO_FILES
}
)
add_custom_target
(
mgb_proto_target DEPENDS
${
GRPC_SRCS
}
${
GRPC_HDRS
}
${
PROTOBUF_PROTOC_EXECUTABLE
}
)
endif
()
file
(
GLOB_RECURSE OPR_DECL_SRCS
"
${
PROJECT_SOURCE_DIR
}
/src/**/*.oprdecl"
)
file
(
GLOB_RECURSE PYTHON_SRCS setup.py
src/python/*.py
...
...
@@ -55,11 +47,7 @@ add_custom_command(
add_custom_target
(
mgb_opr_py DEPENDS
${
CMAKE_CURRENT_BINARY_DIR
}
/megengine/_internal/opr.py
)
set
(
SRCS src/cpp/craniotome.cpp src/cpp/function_replace.cpp src/cpp/intbx.cpp src/cpp/bfloat16.cpp src/cpp/megbrain_config.cpp src/cpp/megbrain_pubapi.cpp src/cpp/megbrain_serialize.cpp src/cpp/megbrain_wrap.cpp src/cpp/mm_handler.cpp src/cpp/opr_defs.cpp src/cpp/opr_helper.cpp src/cpp/plugin.cpp src/cpp/python_helper.cpp
)
if
(
MGE_WITH_DISTRIBUTED
)
list
(
APPEND SRCS src/cpp/zmq_rpc.cpp
)
endif
()
set
(
SRCS src/cpp/craniotome.cpp src/cpp/function_replace.cpp src/cpp/intbx.cpp src/cpp/bfloat16.cpp src/cpp/megbrain_config.cpp src/cpp/megbrain_pubapi.cpp src/cpp/megbrain_serialize.cpp src/cpp/megbrain_wrap.cpp src/cpp/opr_defs.cpp src/cpp/opr_helper.cpp src/cpp/plugin.cpp src/cpp/python_helper.cpp
)
include
(
UseSWIG
)
set_property
(
SOURCE
${
SWIG_SRC
}
PROPERTY CPLUSPLUS ON
)
...
...
@@ -70,7 +58,7 @@ set_property(SOURCE ${SWIG_SRC} PROPERTY SWIG_FLAGS -I${PROJECT_SOURCE_DIR}/src/
set
(
SWIG_OUTFILE_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
)
set
(
CMAKE_SWIG_OUTDIR
${
CMAKE_CURRENT_BINARY_DIR
}
/megengine/_internal
)
swig_add_library
(
mgb LANGUAGE python SOURCES
${
SWIG_SRC
}
${
GRPC_SRCS
}
${
SRCS
}
)
swig_add_library
(
mgb LANGUAGE python SOURCES
${
SWIG_SRC
}
${
SRCS
}
)
set
(
VERSION_SCRIPT
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/version.ld
)
add_custom_target
(
version_ld SOURCES
${
VERSION_SCRIPT
}
)
...
...
@@ -81,12 +69,6 @@ target_include_directories(_mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_C
target_link_libraries
(
_mgb
${
PYTHON_LIBRARIES
}
)
add_dependencies
(
_mgb mgb_opr_py version_ld
)
if
(
MGE_WITH_DISTRIBUTED
)
add_dependencies
(
_mgb mgb_proto_target
)
target_link_libraries
(
_mgb libprotobuf libzmq
)
set
(
CPPZMQ_INC
${
PROJECT_SOURCE_DIR
}
/third_party/cppzmq
)
target_include_directories
(
_mgb PRIVATE
${
CPPZMQ_INC
}
)
endif
()
add_custom_command
(
TARGET _mgb POST_BUILD
...
...
python_module/src/cpp/megbrain_config.cpp
浏览文件 @
d7bb62cf
...
...
@@ -19,6 +19,10 @@
#include <dlfcn.h>
#if MGB_ENABLE_OPR_MM
#include "megbrain/opr/mm_handler.h"
#endif
#if MGB_CUDA
#include <cuda.h>
#endif
...
...
@@ -276,4 +280,37 @@ std::vector<std::pair<uint64_t, std::string>> _config::dump_registered_oprs() {
#endif
}
#if MGB_ENABLE_OPR_MM
/*! see definition : src/cpp/megbrain_config.h.
* Create mm server. port 0 is permitted, leave zmqrpc to decide which port
* should be used.
*/
int
_config
::
create_mm_server
(
const
std
::
string
&
server_addr
,
int
port
)
{
return
create_zmqrpc_server
(
server_addr
,
port
);
}
void
_config
::
group_barrier
(
const
std
::
string
&
server_addr
,
int
port
,
uint32_t
size
,
uint32_t
rank
)
{
mgb_assert
(
rank
<
size
,
"invalid rank %d"
,
rank
);
auto
group_mgr
=
std
::
make_shared
<
GroupClientProxy
>
(
ssprintf
(
"%s:%d"
,
server_addr
.
c_str
(),
port
));
uint32_t
rsp
=
group_mgr
->
group_barrier
(
size
,
rank
);
mgb_assert
(
rsp
!=
0
,
"rank already registered: %d"
,
rank
);
mgb_assert
(
size
==
rsp
,
"inconsistent size: %d, expect %d"
,
size
,
rsp
);
}
#else
int
_config
::
create_mm_server
(
const
std
::
string
&
server_addr
,
int
port
)
{
mgb_throw
(
mgb
::
MegBrainError
,
"OPR_MM suppport disable at compile time"
);
return
0
;
}
void
_config
::
group_barrier
(
const
std
::
string
&
server_addr
,
int
port
,
uint32_t
size
,
uint32_t
rank
)
{
mgb_throw
(
mgb
::
MegBrainError
,
"OPR_MM suppport disable at compile time"
);
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
python_module/src/cpp/opr_defs.cpp
浏览文件 @
d7bb62cf
...
...
@@ -12,7 +12,7 @@
#include "./python_helper.h"
#if MGB_ENABLE_OPR_MM
#include "mm_handler.h"
#include "m
egbrain/opr/m
m_handler.h"
#endif
#include "megbrain/opr/io.h"
...
...
src/CMakeLists.txt
浏览文件 @
d7bb62cf
...
...
@@ -10,6 +10,10 @@ endif()
if
(
MGE_WITH_DISTRIBUTED
)
file
(
GLOB_RECURSE SOURCES_ opr-mm/impl/*.cpp opr-mm/impl/*.inl
)
list
(
APPEND SOURCES
${
SOURCES_
}
)
file
(
GLOB_RECURSE PROTO_FILES RELATIVE
${
CMAKE_CURRENT_SOURCE_DIR
}
"../src/opr-mm/proto/*.proto"
)
PROTOBUF_GENERATE_CPP_WITH_ROOT
(
GRPC_SRCS GRPC_HDRS
${
CMAKE_CURRENT_SOURCE_DIR
}
${
PROTO_FILES
}
)
add_custom_target
(
mgb_proto_target DEPENDS
${
GRPC_SRCS
}
${
GRPC_HDRS
}
${
PROTOBUF_PROTOC_EXECUTABLE
}
)
list
(
APPEND SOURCES
${
GRPC_SRCS
}
)
endif
()
set
(
MGB_INC
${
PROJECT_BINARY_DIR
}
/genfiles core/include gopt/include opr/include plugin/include serialization/include
)
...
...
@@ -52,6 +56,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS)
endif
()
target_link_libraries
(
megbrain megdnn
)
if
(
MGE_WITH_DISTRIBUTED
)
add_dependencies
(
megbrain mgb_proto_target
)
target_link_libraries
(
megbrain libprotobuf libzmq
)
set
(
CPPZMQ_INC
${
PROJECT_SOURCE_DIR
}
/third_party/cppzmq
)
# FIXME: add CMAKE_CURRENT_BINARY_DIR for including mm_handler.pb.h
target_include_directories
(
megbrain PRIVATE
${
CPPZMQ_INC
}
${
CMAKE_CURRENT_BINARY_DIR
}
)
target_link_libraries
(
megbrain megray
)
endif
()
target_link_libraries
(
megbrain
${
MGE_CUDA_LIBS
}
)
...
...
python_module/src/cpp
/mm_handler.cpp
→
src/opr-mm/impl
/mm_handler.cpp
浏览文件 @
d7bb62cf
...
...
@@ -7,13 +7,14 @@
*
*/
#include "mm_handler.h"
#include "m
egbrain/opr/m
m_handler.h"
#include "megbrain/exception.h"
#include "megbrain_config.h"
#include "megbrain_
build_
config.h"
#if MGB_ENABLE_OPR_MM
#include "zmq_rpc.h"
#include "megbrain/opr/zmq_rpc.h"
#include "mm_handler.pb.h"
#include <future>
/* ======================== GroupServerProxy ========================== */
...
...
@@ -128,17 +129,22 @@ void GroupServerProxy::group_barrier(void* input_ptr, size_t input_len,
Request req; \
Response rsp;
#define SOLVE_REQUEST(name, req, rsp) \
std::string req_str; \
mgb_assert(req.SerializeToString(&req_str)); \
zmq::message_t send(req_str.length() + name.length() + 1); \
zmq::message_t recv; \
memcpy(send.data(), name.data(), name.length() + 1); \
memcpy((char*)send.data() + name.length() + 1, req_str.data(), \
req_str.length()); \
m_stub->request(send, recv);
\
#define SOLVE_REQUEST(name, req, rsp)
\
std::string req_str;
\
mgb_assert(req.SerializeToString(&req_str));
\
zmq::message_t send(req_str.length() + name.length() + 1);
\
zmq::message_t recv;
\
memcpy(send.data(), name.data(), name.length() + 1);
\
memcpy((char*)send.data() + name.length() + 1, req_str.data(),
\
req_str.length());
\
static_cast<ZmqRpc::ZmqRpcClient*>(m_stub)->request(send, recv);
\
mgb_assert(rsp.ParseFromArray(recv.data(), recv.size()));
GroupClientProxy
::
GroupClientProxy
(
const
std
::
string
&
server_addr
)
:
m_addr
(
server_addr
),
m_stub
{
ZmqRpc
::
ZmqRpcClient
::
get_client
(
"tcp://"
+
server_addr
)}
{
}
uint64_t
GroupClientProxy
::
opr_register
(
const
std
::
string
&
key
,
size_t
nr_devices
,
uint32_t
rank
,
uintptr_t
stream
)
{
INFO_INIT
(
mm_handler
,
opr_register
,
OprRegister
)
...
...
@@ -199,78 +205,26 @@ uint32_t GroupClientProxy::group_barrier(uint32_t size, uint32_t rank) {
#undef INFO_INIT
#undef SOLVE_REQUEST
/* ======================== ZmqRpcServerMgr ========================== */
class
ZmqRpcServerMgr
{
struct
ServerInfo
{
std
::
unique_ptr
<
ZmqRpc
::
ZmqRpcServer
>
server
;
};
public:
int
create_zmqrpc_server
(
const
std
::
string
&
server_addr
,
int
port
,
std
::
unique_ptr
<
ZmqRpc
::
ZmqRpcServerImpl
>
service
)
{
MGB_LOCK_GUARD
(
m_mtx
);
auto
server
=
std
::
make_unique
<
ZmqRpc
::
ZmqRpcServer
>
(
"tcp://"
+
server_addr
,
port
,
std
::
move
(
service
));
port
=
server
->
port
();
if
(
port
==
-
1
)
{
return
-
1
;
}
auto
full_srv_addr
=
ssprintf
(
"%s:%d"
,
server_addr
.
c_str
(),
port
);
server
->
run
();
auto
ins
=
m_addr2server
.
emplace
(
full_srv_addr
,
ServerInfo
{
std
::
move
(
server
)});
mgb_assert
(
ins
.
second
);
return
port
;
}
static
ZmqRpcServerMgr
*
get_zmqrpc_server_mgr
()
{
static
ZmqRpcServerMgr
mgr
;
return
&
mgr
;
}
private:
std
::
unordered_map
<
std
::
string
,
ServerInfo
>
m_addr2server
;
std
::
mutex
m_mtx
;
struct
ServerInfo
{
std
::
unique_ptr
<
ZmqRpc
::
ZmqRpcServer
>
server
;
};
/*! see definition : src/cpp/megbrain_config.h.
* Create mm server. port 0 is permitted, leave zmqrpc to decide which port
* should be used.
*/
int
_config
::
create_mm_server
(
const
std
::
string
&
server_addr
,
int
port
)
{
return
ZmqRpcServerMgr
::
get_zmqrpc_server_mgr
()
->
create_zmqrpc_server
(
server_addr
,
port
,
std
::
make_unique
<
GroupServerProxy
>
());
}
/* ======================== Group Barrier ========================== */
/*! see definition : src/cpp/megbrain_config.h.
* Block until all ranks in the group reach this barrier
*/
void
_config
::
group_barrier
(
const
std
::
string
&
server_addr
,
int
port
,
uint32_t
size
,
uint32_t
rank
)
{
mgb_assert
(
rank
<
size
,
"invalid rank %d"
,
rank
);
auto
group_mgr
=
std
::
make_shared
<
GroupClientProxy
>
(
ssprintf
(
"%s:%d"
,
server_addr
.
c_str
(),
port
));
uint32_t
rsp
=
group_mgr
->
group_barrier
(
size
,
rank
);
mgb_assert
(
rsp
!=
0
,
"rank already registered: %d"
,
rank
);
mgb_assert
(
size
==
rsp
,
"inconsistent size: %d, expect %d"
,
size
,
rsp
);
}
#else
int
_config
::
create_mm_server
(
const
std
::
string
&
server_addr
,
int
port
)
{
mgb_throw
(
mgb
::
MegBrainError
,
"distributed mode disabled at compile time"
);
return
0
;
}
void
_config
::
group_barrier
(
const
std
::
string
&
server_addr
,
int
port
,
uint32_t
size
,
uint32_t
rank
)
{
mgb_throw
(
mgb
::
MegBrainError
,
"distributed mode disabled at compile time"
);
int
create_zmqrpc_server
(
const
std
::
string
&
server_addr
,
int
port
)
{
static
std
::
unordered_map
<
std
::
string
,
ServerInfo
>
addr2server
;
static
std
::
mutex
mtx
;
MGB_LOCK_GUARD
(
mtx
);
auto
service
=
std
::
make_unique
<
GroupServerProxy
>
();
auto
server
=
std
::
make_unique
<
ZmqRpc
::
ZmqRpcServer
>
(
"tcp://"
+
server_addr
,
port
,
std
::
move
(
service
));
port
=
server
->
port
();
auto
full_srv_addr
=
ssprintf
(
"%s:%d"
,
server_addr
.
c_str
(),
port
);
server
->
run
();
auto
ins
=
addr2server
.
emplace
(
full_srv_addr
,
ServerInfo
{
std
::
move
(
server
)});
mgb_assert
(
ins
.
second
);
return
port
;
}
#endif
...
...
python_module/src/cpp
/zmq_rpc.cpp
→
src/opr-mm/impl
/zmq_rpc.cpp
浏览文件 @
d7bb62cf
#include "zmq_rpc.h"
#include "
megbrain/opr/
zmq_rpc.h"
#include "megbrain/exception.h"
#include "megbrain_config.h"
#include "megbrain_
build_
config.h"
#if MGB_CUDA
#include <unistd.h>
...
...
python_module/src/cpp
/mm_handler.h
→
src/opr-mm/include/megbrain/opr
/mm_handler.h
浏览文件 @
d7bb62cf
...
...
@@ -13,10 +13,7 @@
#if MGB_ENABLE_OPR_MM
#include "zmq_rpc.h"
#include "megbrain/opr/collective_comm.h"
#include "mm_handler.pb.h"
using
namespace
mgb
;
using
namespace
opr
;
...
...
@@ -31,10 +28,7 @@ class GroupClientProxy
public:
virtual
~
GroupClientProxy
()
=
default
;
GroupClientProxy
(
const
std
::
string
&
server_addr
)
:
m_addr
(
server_addr
),
m_stub
{
ZmqRpc
::
ZmqRpcClient
::
get_client
(
"tcp://"
+
server_addr
)}
{
}
GroupClientProxy
(
const
std
::
string
&
server_addr
);
//! graph registration, assign graph_id to worker.
uint64_t
opr_register
(
const
std
::
string
&
key
,
size_t
nr_devices
,
uint32_t
rank
,
...
...
@@ -50,33 +44,20 @@ public:
uint32_t
group_barrier
(
uint32_t
size
,
uint32_t
rank
)
override
;
//! thread safe to create handler with address
static
GroupClientProxy
*
get_handler
(
const
std
::
string
&
addr
)
{
static
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
GroupClientProxy
>>
addr2handler
;
static
std
::
mutex
mtx
;
MGB_LOCK_GUARD
(
mtx
);
auto
it
=
addr2handler
.
emplace
(
addr
,
nullptr
);
if
(
!
it
.
second
)
{
mgb_assert
(
it
.
first
->
second
->
m_addr
==
addr
);
return
it
.
first
->
second
.
get
();
}
else
{
auto
handler
=
std
::
make_unique
<
GroupClientProxy
>
(
addr
);
auto
handler_ptr
=
handler
.
get
();
it
.
first
->
second
=
std
::
move
(
handler
);
return
handler_ptr
;
}
}
const
std
::
string
&
get_addr
()
const
{
return
m_addr
;
}
private:
const
std
::
string
m_addr
;
ZmqRpc
::
ZmqRpcClient
*
m_stub
;
void
*
m_stub
;
};
/* ======================== ZmqRpcServerMgr ========================== */
int
create_zmqrpc_server
(
const
std
::
string
&
server_addr
,
int
port
);
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
python_module/src/cpp
/zmq_rpc.h
→
src/opr-mm/include/megbrain/opr
/zmq_rpc.h
浏览文件 @
d7bb62cf
...
...
@@ -101,4 +101,4 @@ private:
std
::
vector
<
std
::
shared_ptr
<
zmq
::
socket_t
>>
m_own_sockets
;
};
}
// namespace ZmqRpc
#endif
\ No newline at end of file
#endif
python_module/src
/proto/mm_handler.proto
→
src/opr-mm
/proto/mm_handler.proto
浏览文件 @
d7bb62cf
文件已移动
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录