Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
11b121a7
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
11b121a7
编写于
9月 05, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mgb/jit): link libdevice.bc when generate nvvm ir
GitOrigin-RevId: 49289d65c4d627964c0d53a48f5f911db98012f6
上级
aa7f28b8
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
110 addition
and
13 deletion
+110
-13
src/jit/impl/mlir/compiler.cpp
src/jit/impl/mlir/compiler.cpp
+77
-8
src/jit/impl/mlir/ir/common.cpp
src/jit/impl/mlir/ir/common.cpp
+4
-1
src/jit/impl/mlir/ir/lower_to_gpu_pass.cpp
src/jit/impl/mlir/ir/lower_to_gpu_pass.cpp
+0
-3
src/jit/test/codegen.cpp
src/jit/test/codegen.cpp
+29
-1
未找到文件。
src/jit/impl/mlir/compiler.cpp
浏览文件 @
11b121a7
...
...
@@ -10,6 +10,7 @@
* implied.
*/
#include "llvm/Pass.h"
#include "megbrain_build_config.h"
#if MGB_JIT && MGB_JIT_MLIR
...
...
@@ -21,6 +22,7 @@
#include "megbrain/comp_node_env.h"
#include "megbrain/jit/mlir/ir/dialect.h"
#include "megbrain/jit/mlir/ir/passes.h"
#include "megbrain/utils/timer.h"
#include <mlir/Conversion/GPUCommon/GPUCommonPass.h>
#include <mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h>
...
...
@@ -36,6 +38,11 @@
#include <mlir/Transforms/Passes.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/IRReader/IRReader.h>
#include <llvm/Linker/Linker.h>
#include <dlfcn.h>
#include <dirent.h>
using
namespace
mgb
;
using
namespace
jit
;
...
...
@@ -59,6 +66,61 @@ mlir::OwnedBlob compile_ptx_to_cubin(const std::string ptx, mlir::Location,
return
result
;
}
std
::
unique_ptr
<
llvm
::
Module
>
translate_module_to_nvvm_ir_and_link_device
(
Operation
*
m
)
{
std
::
unique_ptr
<
llvm
::
Module
>
module
=
mlir
::
translateModuleToNVVMIR
(
m
);
auto
get_device_path
=
[]()
->
std
::
string
{
auto
cuda_path
=
getenv
(
"CUDA_BIN_PATH"
);
std
::
string
device_dir
;
if
(
!
cuda_path
)
{
char
cuda_lib_path
[
PATH_MAX
];
auto
handle
=
dlopen
(
"libcudart.so"
,
RTLD_GLOBAL
|
RTLD_LAZY
);
mgb_assert
(
handle
!=
nullptr
,
"%s"
,
dlerror
());
mgb_assert
(
dlinfo
(
handle
,
RTLD_DI_ORIGIN
,
&
cuda_lib_path
)
!=
-
1
,
"%s"
,
dlerror
());
device_dir
=
std
::
string
(
cuda_lib_path
)
+
"/../../../nvvm/libdevice/"
;
mgb_assert
(
!
dlclose
(
handle
),
"fail to dlclose handle"
);
}
else
{
device_dir
=
std
::
string
(
cuda_path
)
+
"/nvvm/libdevice/"
;
}
DIR
*
dirp
;
struct
dirent
*
directory
;
dirp
=
opendir
(
device_dir
.
c_str
());
if
(
dirp
)
{
while
((
directory
=
readdir
(
dirp
))
!=
nullptr
)
{
if
(
!
strncmp
(
directory
->
d_name
,
"libdevice"
,
9
))
{
closedir
(
dirp
);
return
device_dir
+
std
::
string
(
directory
->
d_name
);
}
}
closedir
(
dirp
);
}
return
{};
};
//! load libdevice.bc
llvm
::
SMDiagnostic
err
;
auto
libdevice_path
=
get_device_path
();
std
::
unique_ptr
<
llvm
::
Module
>
mlib
=
llvm
::
parseIRFile
(
libdevice_path
.
c_str
(),
err
,
module
->
getContext
());
if
(
mlib
.
get
())
{
mlib
->
setTargetTriple
(
module
->
getTargetTriple
());
mlib
->
setDataLayout
(
module
->
getDataLayout
());
RealTimer
timer
;
mgb_assert
(
!
llvm
::
Linker
::
linkModules
(
*
module
,
std
::
move
(
mlib
),
llvm
::
Linker
::
Flags
::
LinkOnlyNeeded
),
"failed to parse ir file libdevice.bc"
);
mgb_log
(
"MLIR JIT: link libdevice.bc, used: %.3fms"
,
timer
.
get_msecs
());
}
else
{
mgb_log_warn
(
"Fail to load bitcode file %s"
,
libdevice_path
.
c_str
());
}
return
module
;
}
#endif
void
add_cpu_lowering_pass
(
mlir
::
PassManager
&
manager
)
{
...
...
@@ -80,7 +142,8 @@ void add_cpu_lowering_pass(mlir::PassManager& manager) {
}
#if MGB_CUDA
void
add_cuda_lowering_pass
(
mlir
::
PassManager
&
manager
,
CompNode
cn
)
{
void
add_cuda_lowering_pass
(
mlir
::
PassManager
&
manager
,
const
std
::
string
&
target_chip
)
{
{
mlir
::
OpPassManager
&
opt_pm
=
manager
.
nest
<
mlir
::
FuncOp
>
();
opt_pm
.
addPass
(
mlir
::
createCanonicalizerPass
());
...
...
@@ -99,12 +162,10 @@ void add_cuda_lowering_pass(mlir::PassManager& manager, CompNode cn) {
auto
&
kernel_pm
=
manager
.
nest
<
gpu
::
GPUModuleOp
>
();
kernel_pm
.
addPass
(
mlir
::
createLowerGpuOpsToNVVMOpsPass
());
auto
&&
prop
=
CompNodeEnv
::
from_comp_node
(
cn
).
cuda_env
().
device_prop
;
kernel_pm
.
addPass
(
mlir
::
createConvertGPUKernelToBlobPass
(
mlir
::
translateModuleToNVVMIR
,
compile_ptx_to_cubin
,
"nvptx64-nvidia-cuda"
,
ssprintf
(
"sm_%d%d"
,
prop
.
major
,
prop
.
minor
),
"+ptx60"
,
MLIRCUDAExecutable
::
sm_blob_annotation
));
translate_module_to_nvvm_ir_and_link_device
,
compile_ptx_to_cubin
,
"nvptx64-nvidia-cuda"
,
target_chip
,
"+ptx60"
,
MLIRCUDAExecutable
::
sm_blob_annotation
));
}
}
#endif
...
...
@@ -134,21 +195,29 @@ void MLIRCompiler::run_lowering_pass(mlir::OwningModuleRef& module,
CompNode
cn
)
{
mgb_assert
(
cn
.
device_type
()
==
m_device_type
);
mlir
::
PassManager
manager
(
module
->
getContext
());
std
::
string
target_chip
;
switch
(
m_device_type
)
{
case
CompNode
::
DeviceType
::
CPU
:
add_cpu_lowering_pass
(
manager
);
break
;
#if MGB_CUDA
case
CompNode
::
DeviceType
::
CUDA
:
add_cuda_lowering_pass
(
manager
,
cn
);
case
CompNode
::
DeviceType
::
CUDA
:
{
auto
&&
prop
=
CompNodeEnv
::
from_comp_node
(
cn
).
cuda_env
().
device_prop
;
std
::
string
target_chip
=
ssprintf
(
"sm_%d%d"
,
prop
.
major
,
prop
.
minor
);
add_cuda_lowering_pass
(
manager
,
target_chip
);
break
;
}
#endif
default:
mgb_throw
(
InternalError
,
"Unsupport device type: %d"
,
static_cast
<
int
>
(
m_device_type
));
break
;
}
RealTimer
timer
;
mgb_assert
(
mlir
::
succeeded
(
manager
.
run
(
*
module
)));
mgb_log
(
"MLIR JIT: run lowering pass used: %.3f ms"
,
timer
.
get_msecs
());
}
std
::
unique_ptr
<
Executable
>
MLIRCompiler
::
do_compile
(
...
...
src/jit/impl/mlir/ir/common.cpp
浏览文件 @
11b121a7
...
...
@@ -66,7 +66,6 @@ mlir::Value ValueBuilderHelper::const_val(float val) {
}
cb
(
neg
,
NegFOp
);
cb
(
abs
,
AbsFOp
);
cb
(
ceil
,
CeilFOp
);
cb
(
cos
,
CosOp
);
cb
(
exp
,
ExpOp
);
...
...
@@ -79,6 +78,10 @@ cb(sqrt, SqrtOp);
cb
(
tanh
,
TanhOp
);
#undef cb
mlir
::
Value
ValueBuilderHelper
::
abs
(
mlir
::
Value
lhs
)
{
return
max
(
lhs
,
const_val
(
0.
f
));
}
mlir
::
Value
ValueBuilderHelper
::
floor
(
mlir
::
Value
lhs
)
{
//! FIXME use standard floor when upgrade llvm
return
neg
(
ceil
(
neg
(
lhs
)));
...
...
src/jit/impl/mlir/ir/lower_to_gpu_pass.cpp
浏览文件 @
11b121a7
...
...
@@ -266,9 +266,6 @@ public:
target
.
addLegalDialect
<
gpu
::
GPUDialect
>
();
target
.
addIllegalDialect
<
MgbDialect
>
();
patterns
.
insert
<
AddOpLowering
,
AssignOpLowering
,
ReturnOpLowering
>
(
&
getContext
(),
&
launch_op
);
#define cb(_op, _) _op##Lowering,
patterns
.
insert
<
MLIR_MGB_FOREACH_ELEMWISE_MODE_UNARY
(
cb
)
MLIR_MGB_FOREACH_ELEMWISE_MODE_BINARY
(
cb
)
...
...
src/jit/test/codegen.cpp
浏览文件 @
11b121a7
...
...
@@ -137,7 +137,7 @@ void run_mlir(CompNode cn) {
b
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x1
),
c
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x2
);
auto
y
=
a
+
b
+
c
;
auto
y
=
a
+
b
*
c
;
auto
ig_gen
=
std
::
make_unique
<
InternalGraphGenerator
>
(
y
.
node
()
->
owner_opr
());
...
...
@@ -273,6 +273,20 @@ TYPED_TEST(TestJITMlirUnaryElemwise, run) {
run_mlir_mode
<
TypeParam
,
1
>
(
cn
);
}
#define SKIP_MODE(_mode) \
if (TypeParam::mode == opr::Elemwise::Mode::_mode) { \
printf("skip\n"); \
return; \
}
TYPED_TEST
(
TestJITMlirUnaryElemwise
,
runGpu
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
SKIP_MODE
(
SIN
);
run_mlir_mode
<
TypeParam
,
1
>
(
cn
);
}
///////////////////////// binary ///////////////////////////////
// clang-format off
#define FOREACH_BINARY_MODE(cb) \
...
...
@@ -319,6 +333,12 @@ TYPED_TEST(TestJITMlirBinaryElemwise, run) {
run_mlir_mode
<
TypeParam
,
2
>
(
cn
);
}
TYPED_TEST
(
TestJITMlirBinaryElemwise
,
runGpu
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
run_mlir_mode
<
TypeParam
,
2
>
(
cn
);
}
///////////////////////// ternary ///////////////////////////////
// clang-format off
#define FOREACH_TERNARY_MODE(cb) \
...
...
@@ -345,6 +365,14 @@ TYPED_TEST(TestJITMlirTernaryElemwise, run) {
run_mlir_mode
<
TypeParam
,
3
>
(
cn
);
}
TYPED_TEST
(
TestJITMlirTernaryElemwise
,
runGpu
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
run_mlir_mode
<
TypeParam
,
3
>
(
cn
);
}
#undef SKIP_MODE
#endif
#endif // MGB_JIT
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录