Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
18ec5341
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
396
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
18ec5341
编写于
11月 10, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(dnn): remove unused costmodel in cuda
GitOrigin-RevId: b15f0607b91b138c593d55b847bcc71b3ffee613
上级
e39f9386
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
0 addition
and
1076 deletion
+0
-1076
dnn/src/cuda/convolution/cudnn_heuristic.cpp
dnn/src/cuda/convolution/cudnn_heuristic.cpp
+0
-235
dnn/src/cuda/convolution/cudnn_heuristic.h
dnn/src/cuda/convolution/cudnn_heuristic.h
+0
-86
dnn/src/cuda/convolution/get_params.cpp
dnn/src/cuda/convolution/get_params.cpp
+0
-754
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
+0
-1
未找到文件。
dnn/src/cuda/convolution/cudnn_heuristic.cpp
已删除
100644 → 0
浏览文件 @
e39f9386
/**
* \file dnn/src/cuda/convolution/cudnn_heuristic.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./cudnn_heuristic.h"
#include "megdnn.h"
#include "src/cuda/utils.h"
using
namespace
megdnn
;
using
namespace
cuda
;
using
namespace
convolution
;
bool
convolution
::
PerformanceModelBase
::
args_is_proper
(
const
TensorLayout
*
x_layout
,
const
ConvolutionBase
<
param
::
Convolution
>::
CanonizedFilterMeta
&
filter
)
{
bool
available
=
(
x_layout
->
dtype
==
dtype
::
Float32
()
&&
filter
.
format
==
param
::
Convolution
::
Format
::
NCHW
&&
filter
.
should_flip
==
0
&&
filter
.
stride
[
0
]
==
1
&&
filter
.
stride
[
1
]
==
1
&&
filter
.
spatial_ndim
==
2
&&
filter
.
dilation
[
0
]
==
1
&&
filter
.
dilation
[
1
]
==
1
);
return
available
;
}
bool
convolution
::
PerformanceModelBase
::
predict_time_success
(
const
TensorLayout
*
x_layout
,
const
ConvolutionBase
<
param
::
Convolution
>::
CanonizedFilterMeta
&
filter
,
const
ConvolutionType
&
conv_type
,
float
**
mask_p
,
float
**
time_pred_p
,
size_t
*
output_dim_p
)
{
size_t
layer_num
;
const
size_t
*
layers_dim
;
size_t
input_params
[
9
];
const
float
*
matrices
;
const
float
*
biases
;
const
float
*
alpha
;
const
float
*
beta
;
float
*
hidden_units
;
if
(
!
(
args_is_proper
(
x_layout
,
filter
)))
{
return
false
;
}
if
(
!
convolution
::
heuristic_params_available
(
cuda
::
current_device_prop
().
major
,
cuda
::
current_device_prop
().
minor
,
&
layer_num
,
&
layers_dim
,
&
matrices
,
&
biases
,
&
alpha
,
&
beta
,
conv_type
,
&
hidden_units
,
time_pred_p
,
mask_p
))
{
return
false
;
}
input_params
[
0
]
=
x_layout
->
shape
[
0
];
input_params
[
1
]
=
x_layout
->
shape
[
1
];
input_params
[
2
]
=
x_layout
->
shape
[
2
];
input_params
[
3
]
=
x_layout
->
shape
[
3
];
input_params
[
4
]
=
filter
.
ocpg
;
input_params
[
5
]
=
filter
.
spatial
[
0
];
input_params
[
6
]
=
filter
.
spatial
[
1
];
input_params
[
7
]
=
filter
.
padding
[
0
];
input_params
[
8
]
=
filter
.
padding
[
1
];
predict_time
(
layer_num
,
layers_dim
,
input_params
,
matrices
,
biases
,
alpha
,
beta
,
hidden_units
,
*
time_pred_p
);
*
output_dim_p
=
layers_dim
[
layer_num
-
1
];
return
true
;
}
void
convolution
::
PerformanceModelBase
::
predict_time
(
const
size_t
layer_num
,
const
size_t
*
layers_dim
,
const
size_t
*
input_params
,
const
float
*
matrices
,
const
float
*
biases
,
const
float
*
alpha
,
const
float
*
beta
,
float
*
hidden_units
,
float
*
time_pred
)
{
size_t
layer_ind
;
size_t
i
,
j
;
const
float
*
matrix_entry
=
matrices
,
*
bias_entry
=
biases
;
float
*
prev_entry
,
*
next_entry
=
hidden_units
;
size_t
shape
;
for
(
j
=
0
;
j
<
layers_dim
[
1
];
++
j
)
{
for
(
i
=
0
;
i
<
layers_dim
[
0
];
++
i
)
{
next_entry
[
j
]
+=
matrix_entry
[
j
*
layers_dim
[
0
]
+
i
]
*
input_params
[
i
];
}
next_entry
[
j
]
+=
bias_entry
[
j
];
next_entry
[
j
]
=
element_ReLU
(
next_entry
[
j
]);
}
prev_entry
=
next_entry
;
next_entry
+=
layers_dim
[
1
];
matrix_entry
+=
layers_dim
[
0
]
*
layers_dim
[
1
];
bias_entry
+=
layers_dim
[
1
];
for
(
layer_ind
=
1
;
layer_ind
<
layer_num
-
2
;
++
layer_ind
)
{
for
(
j
=
0
;
j
<
layers_dim
[
layer_ind
+
1
];
++
j
)
{
for
(
i
=
0
;
i
<
layers_dim
[
layer_ind
];
++
i
)
{
next_entry
[
j
]
+=
matrix_entry
[
j
*
layers_dim
[
layer_ind
]
+
i
]
*
prev_entry
[
i
];
}
next_entry
[
j
]
+=
bias_entry
[
j
];
next_entry
[
j
]
=
element_ReLU
(
next_entry
[
j
]);
}
prev_entry
=
next_entry
;
next_entry
+=
layers_dim
[
layer_ind
+
1
];
matrix_entry
+=
layers_dim
[
layer_ind
]
*
layers_dim
[
layer_ind
+
1
];
bias_entry
+=
layers_dim
[
layer_ind
+
1
];
}
for
(
j
=
0
;
j
<
layers_dim
[
layer_num
-
2
];
++
j
)
{
for
(
i
=
0
;
i
<
layers_dim
[
layer_num
-
1
];
++
i
)
{
time_pred
[
j
]
+=
matrix_entry
[
j
*
layers_dim
[
i
]]
*
input_params
[
i
];
}
time_pred
[
j
]
+=
bias_entry
[
j
];
}
shape
=
input_params
[
0
]
*
input_params
[
1
]
*
input_params
[
4
]
*
(
input_params
[
2
]
+
input_params
[
7
]
*
2
-
input_params
[
5
]
+
1
)
*
(
input_params
[
3
]
+
input_params
[
8
]
*
2
-
input_params
[
6
]
+
1
)
*
input_params
[
5
]
*
input_params
[
6
];
for
(
i
=
0
;
i
<
layers_dim
[
layer_num
-
1
];
++
i
)
{
time_pred
[
i
]
=
std
::
exp2f
(
time_pred
[
i
]
*
beta
[
i
])
*
(
shape
/
alpha
[
i
]);
}
}
/* backward filter */
void
convolution
::
PerformanceModelBackwardFilter
::
gen_mask_backward_filter
(
float
*
mask
,
const
size_t
output_dim
,
const
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
,
const
CUDNNBwdFilterDescs
&
D
,
const
size_t
workspace_size_limit_in_bytes
)
{
size_t
i
;
size_t
workspace_size
;
for
(
i
=
0
;
i
<
output_dim
;
++
i
)
{
mask
[
i
]
=
-
1.0
f
;
auto
cudnnStat
=
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
args
.
handle
->
cudnn_handle
(),
D
.
src_desc
.
desc
,
D
.
diff_desc
.
desc
,
D
.
conv_desc
.
desc
,
D
.
grad_desc
.
desc
,
static_cast
<
cudnnConvolutionBwdFilterAlgo_t
>
(
i
),
&
workspace_size
);
if
(
cudnnStat
==
CUDNN_STATUS_SUCCESS
&&
workspace_size
<
workspace_size_limit_in_bytes
)
{
mask
[
i
]
=
1.0
f
;
}
}
}
bool
convolution
::
PerformanceModelBackwardFilter
::
get_algo_backward_filter_success
(
const
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
,
const
CUDNNBwdFilterDescs
&
D
,
const
size_t
workspace_limit_in_bytes
,
cudnnConvolutionBwdFilterAlgo_t
*
algo
)
{
float
*
mask
;
size_t
output_dim
;
float
*
time_pred
;
if
(
!
predict_time_success
(
args
.
src_layout
,
args
.
grad_filter_meta
,
ConvolutionType
::
BACKWARD_FILTER
,
&
(
mask
),
&
(
time_pred
),
&
(
output_dim
)))
{
return
false
;
}
gen_mask_backward_filter
(
mask
,
output_dim
,
args
,
D
,
workspace_limit_in_bytes
);
size_t
i
,
selected
=
0
;
for
(
i
=
0
;
i
<
output_dim
;
++
i
)
{
if
(
mask
[
i
]
>
0
&&
time_pred
[
i
]
<
time_pred
[
selected
])
{
selected
=
i
;
}
}
*
algo
=
static_cast
<
cudnnConvolutionBwdFilterAlgo_t
>
(
selected
);
return
mask
[
selected
]
>
0
;
}
/* backward data */
void
convolution
::
PerformanceModelBackwardData
::
gen_mask_backward_data
(
float
*
mask
,
const
size_t
output_dim
,
const
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
,
const
CUDNNBwdDataDescs
&
D
,
const
size_t
workspace_size_limit_in_bytes
)
{
size_t
i
;
size_t
workspace_size
;
for
(
i
=
0
;
i
<
output_dim
;
++
i
)
{
mask
[
i
]
=
-
1.0
f
;
auto
cudnnStat
=
cudnnGetConvolutionBackwardDataWorkspaceSize
(
args
.
handle
->
cudnn_handle
(),
D
.
filter_desc
.
desc
,
D
.
diff_desc
.
desc
,
D
.
conv_desc
.
desc
,
D
.
grad_desc
.
desc
,
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
i
),
&
workspace_size
);
if
(
cudnnStat
==
CUDNN_STATUS_SUCCESS
&&
workspace_size
<
workspace_size_limit_in_bytes
)
{
mask
[
i
]
=
1.0
f
;
}
}
}
bool
convolution
::
PerformanceModelBackwardData
::
get_algo_backward_data_success
(
const
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
,
const
CUDNNBwdDataDescs
&
D
,
const
size_t
workspace_limit_in_bytes
,
cudnnConvolutionBwdDataAlgo_t
*
algo
)
{
float
*
mask
;
size_t
output_dim
;
float
*
time_pred
;
if
(
!
predict_time_success
(
args
.
grad_layout
,
args
.
filter_meta
,
ConvolutionType
::
BACKWARD_DATA
,
&
mask
,
&
time_pred
,
&
output_dim
))
{
return
false
;
}
gen_mask_backward_data
(
mask
,
output_dim
,
args
,
D
,
workspace_limit_in_bytes
);
size_t
i
,
selected
=
0
;
for
(
i
=
0
;
i
<
output_dim
;
++
i
)
{
if
(
mask
[
i
]
>
0
&&
time_pred
[
i
]
<
time_pred
[
selected
])
{
selected
=
i
;
}
}
// special case:
// if the filter shape in cudnnConvolutionBackwardData is too asymmetric,
// the performance of algo1 is dramatically reduced,
// we temporarily choose algo0.
if
(
args
.
filter_meta
.
spatial
[
0
]
/
args
.
filter_meta
.
spatial
[
1
]
>
32
||
args
.
filter_meta
.
spatial
[
1
]
/
args
.
filter_meta
.
spatial
[
0
]
>
32
)
{
selected
=
0
;
}
*
algo
=
static_cast
<
cudnnConvolutionBwdDataAlgo_t
>
(
selected
);
return
mask
[
selected
]
>
0
;
}
dnn/src/cuda/convolution/cudnn_heuristic.h
已删除
100644 → 0
浏览文件 @
e39f9386
/**
* \file dnn/src/cuda/convolution/cudnn_heuristic.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "src/cuda/convolution/backward_data/algo.h"
#include "src/cuda/convolution/backward_filter/algo.h"
namespace
megdnn
{
namespace
cuda
{
namespace
convolution
{
enum
class
ConvolutionType
{
FORWARD
=
0
,
BACKWARD_FILTER
=
1
,
BACKWARD_DATA
=
2
};
bool
heuristic_params_available
(
int
cuda_major
,
int
cuda_minor
,
size_t
*
layer_num_p
,
const
size_t
**
layers_dim_p
,
const
float
**
matrices_p
,
const
float
**
biases_p
,
const
float
**
alpha_p
,
const
float
**
beta_p
,
const
ConvolutionType
&
conv_type
,
float
**
hidden_units_p
,
float
**
time_pred_p
,
float
**
mask_p
);
class
PerformanceModelBase
{
public:
static
float
element_ReLU
(
float
element
)
{
return
element
>
0.0
?
element
:
0.0
;
}
static
bool
predict_time_success
(
const
TensorLayout
*
x_layout
,
const
ConvolutionBase
<
param
::
Convolution
>::
CanonizedFilterMeta
&
filter
,
const
ConvolutionType
&
conv_type
,
float
**
mask_p
,
float
**
time_pred_p
,
size_t
*
output_dim_p
);
private:
static
bool
args_is_proper
(
const
TensorLayout
*
x_layout
,
const
ConvolutionBase
<
param
::
Convolution
>::
CanonizedFilterMeta
&
filter
);
static
void
predict_time
(
const
size_t
layer_num
,
const
size_t
*
layers_dim
,
const
size_t
*
input_params
,
const
float
*
matrices
,
const
float
*
biases
,
const
float
*
alpha
,
const
float
*
beta
,
float
*
hidden_units
,
float
*
time_pred
);
};
class
PerformanceModelBackwardFilter
:
public
PerformanceModelBase
{
public:
static
bool
get_algo_backward_filter_success
(
const
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
,
const
CUDNNBwdFilterDescs
&
D
,
const
size_t
workspace_limit_in_bytes
,
cudnnConvolutionBwdFilterAlgo_t
*
algo
);
private:
static
void
gen_mask_backward_filter
(
float
*
mask
,
const
size_t
output_dim
,
const
ConvolutionBackwardFilterImpl
::
AlgoBase
::
SizeArgs
&
args
,
const
CUDNNBwdFilterDescs
&
D
,
const
size_t
workspace_limit_in_bytes
);
};
class
PerformanceModelBackwardData
:
public
PerformanceModelBase
{
public:
static
bool
get_algo_backward_data_success
(
const
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
,
const
CUDNNBwdDataDescs
&
D
,
const
size_t
workspace_limit_in_bytes
,
cudnnConvolutionBwdDataAlgo_t
*
algo
);
private:
static
void
gen_mask_backward_data
(
float
*
mask
,
const
size_t
output_dim
,
const
ConvolutionBackwardDataImpl
::
AlgoBase
::
SizeArgs
&
args
,
const
CUDNNBwdDataDescs
&
D
,
const
size_t
workspace_limit_in_bytes
);
};
}
// namespace convolution
}
// namespace cuda
}
// namespace megdnn
dnn/src/cuda/convolution/get_params.cpp
已删除
100644 → 0
浏览文件 @
e39f9386
此差异已折叠。
点击以展开。
dnn/src/x86/conv_bias/f32/winograd_algo.cpp
浏览文件 @
18ec5341
...
@@ -16,7 +16,6 @@
...
@@ -16,7 +16,6 @@
#include "src/x86/conv_bias/opr_impl.h"
#include "src/x86/conv_bias/opr_impl.h"
#include "src/x86/conv_bias/postprocess_helper.h"
#include "src/x86/conv_bias/postprocess_helper.h"
#include "src/x86/handle.h"
#include "src/x86/handle.h"
#include "src/x86/profile.h"
#include "midout.h"
#include "midout.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录