Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
0d17c047
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0d17c047
编写于
6月 24, 2022
作者:
Z
zhaoying9105
提交者:
GitHub
6月 24, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU](bugfix): fix MLUCnnl::ScatterFunctor function declare bug (#43778)
上级
03972d5a
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
1061 addition
and
554 deletion
+1061
-554
paddle/fluid/operators/mlu/mlu_baseop.h
paddle/fluid/operators/mlu/mlu_baseop.h
+1030
-541
paddle/fluid/operators/scatter_op_mlu.cc
paddle/fluid/operators/scatter_op_mlu.cc
+31
-13
未找到文件。
paddle/fluid/operators/mlu/mlu_baseop.h
浏览文件 @
0d17c047
...
...
@@ -35,9 +35,12 @@ using DeviceContextPool = platform::DeviceContextPool;
using
MLUDeviceContext
=
platform
::
MLUDeviceContext
;
const
std
::
map
<
std
::
string
,
cnnlReduceOp_t
>
MLUReduceOpMap
=
{
{
"reduce_all"
,
CNNL_REDUCE_AND
},
{
"reduce_any"
,
CNNL_REDUCE_OR
},
{
"reduce_max"
,
CNNL_REDUCE_MAX
},
{
"reduce_mean"
,
CNNL_REDUCE_AVG
},
{
"reduce_min"
,
CNNL_REDUCE_MIN
},
{
"reduce_sum"
,
CNNL_REDUCE_ADD
},
{
"reduce_all"
,
CNNL_REDUCE_AND
},
{
"reduce_any"
,
CNNL_REDUCE_OR
},
{
"reduce_max"
,
CNNL_REDUCE_MAX
},
{
"reduce_mean"
,
CNNL_REDUCE_AVG
},
{
"reduce_min"
,
CNNL_REDUCE_MIN
},
{
"reduce_sum"
,
CNNL_REDUCE_ADD
},
{
"reduce_prod"
,
CNNL_REDUCE_MUL
},
};
...
...
@@ -225,36 +228,49 @@ class MLUCnnlTensorDesc {
MLUCnnlTensorDesc
&
operator
=
(
MLUCnnlTensorDesc
&&
rhs
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
const
cnnlTensorLayout_t
layout
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
const
cnnlTensorLayout_t
layout
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
const
cnnlTensorLayout_t
layout
,
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
const
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
);
explicit
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
);
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
,
int
position
,
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
,
int
position
,
float
scale
);
~
MLUCnnlTensorDesc
();
...
...
@@ -270,8 +286,10 @@ class MLUCnnlActivationDesc {
MLUCnnlActivationDesc
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
MLUCnnlActivationDesc
&
operator
=
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
);
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
,
const
float
sliced_dim
,
const
float
selu_alpha
,
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
,
const
float
sliced_dim
,
const
float
selu_alpha
,
const
float
selu_lambda
);
const
cnnlActivationDescriptor_t
get
()
const
;
...
...
@@ -288,14 +306,22 @@ class MLUCnnlPoolingDesc {
MLUCnnlPoolingDesc
(
const
cnnlPoolingMode_t
mode
,
const
cnnlNanPropagation_t
maxpooling_nan_opt
,
int
window_rows
,
int
window_cols
,
int64_t
pad_up
,
int64_t
pad_down
,
int64_t
pad_left
,
int64_t
pad_right
,
int
row_stride
,
int
col_stride
,
int
row_dilation
,
int
col_dilation
,
bool
ceil_mode
);
int
window_rows
,
int
window_cols
,
int64_t
pad_up
,
int64_t
pad_down
,
int64_t
pad_left
,
int64_t
pad_right
,
int
row_stride
,
int
col_stride
,
int
row_dilation
,
int
col_dilation
,
bool
ceil_mode
);
MLUCnnlPoolingDesc
(
const
cnnlPoolingMode_t
mode
,
const
cnnlNanPropagation_t
maxpooling_nan_opt
,
const
int
tensor_rank
,
const
std
::
vector
<
int
>&
window
,
const
int
tensor_rank
,
const
std
::
vector
<
int
>&
window
,
const
std
::
vector
<
int
>&
padding
,
const
std
::
vector
<
int
>&
stride
);
...
...
@@ -364,8 +390,10 @@ class MLUCnnlNMSDesc {
MLUCnnlNMSDesc
(
const
MLUCnnlNMSDesc
&
desc
)
=
delete
;
MLUCnnlNMSDesc
&
operator
=
(
const
MLUCnnlNMSDesc
&
desc
)
=
delete
;
MLUCnnlNMSDesc
(
const
cnnlNmsOutputMode_t
mode
,
const
float
iou_threshold
,
const
int
max_output_size
,
const
float
confidence_threshold
,
MLUCnnlNMSDesc
(
const
cnnlNmsOutputMode_t
mode
,
const
float
iou_threshold
,
const
int
max_output_size
,
const
float
confidence_threshold
,
const
int
input_layout
);
const
cnnlNmsDescriptor_t
get
()
const
;
...
...
@@ -378,12 +406,17 @@ class MLUCnnlNMSDesc {
class
MLUCnnlConvolutionDesc
{
public:
MLUCnnlConvolutionDesc
(
const
int
dims
,
const
int
pad
[],
const
int
stride
[],
const
int
dilation
[],
const
int
group_count
,
MLUCnnlConvolutionDesc
(
const
int
dims
,
const
int
pad
[],
const
int
stride
[],
const
int
dilation
[],
const
int
group_count
,
const
cnnlDataType_t
tensor_dtype
);
MLUCnnlConvolutionDesc
(
const
int
dims
,
const
int64_t
pad
[],
const
int64_t
stride
[],
const
int64_t
dilation
[],
MLUCnnlConvolutionDesc
(
const
int
dims
,
const
int64_t
pad
[],
const
int64_t
stride
[],
const
int64_t
dilation
[],
const
int
group_count
,
const
cnnlDataType_t
tensor_dtype
);
...
...
@@ -402,7 +435,8 @@ class MLUCnnlConvolutionDesc {
class
MLUCnnlBatchSpaceDesc
{
public:
MLUCnnlBatchSpaceDesc
(
uint32_t
block_shape
[],
uint32_t
paddings
[],
MLUCnnlBatchSpaceDesc
(
uint32_t
block_shape
[],
uint32_t
paddings
[],
const
uint32_t
block_shape_size
,
const
uint32_t
paddings_size
);
...
...
@@ -446,8 +480,12 @@ class MLUCnnlTrigonDesc {
class
MLUCnnlDCNDesc
{
public:
MLUCnnlDCNDesc
(
int
dimNb
,
const
int
*
pad
,
const
int
*
stride
,
const
int
*
dilation
,
int
deformable_group
,
int
conv_group
,
MLUCnnlDCNDesc
(
int
dimNb
,
const
int
*
pad
,
const
int
*
stride
,
const
int
*
dilation
,
int
deformable_group
,
int
conv_group
,
int
im2col_step
);
const
cnnlDCNDescriptor_t
get
()
const
;
...
...
@@ -461,55 +499,88 @@ class MLUCnnl {
public:
static
void
Active
(
const
ExecutionContext
&
ctx
,
cnnlActivationDescriptor_t
active_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ActiveGrad
(
const
ExecutionContext
&
ctx
,
cnnlActivationDescriptor_t
active_desc
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
Concat
(
const
ExecutionContext
&
ctx
,
const
int
pack_num
,
const
int
axis
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ActiveGrad
(
const
ExecutionContext
&
ctx
,
cnnlActivationDescriptor_t
active_desc
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
Concat
(
const
ExecutionContext
&
ctx
,
const
int
pack_num
,
const
int
axis
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
const
void
*
const
inputs
[],
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Concat
(
const
MLUDeviceContext
&
dev_ctx
,
const
int
pack_num
,
const
int
axis
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
static
void
Concat
(
const
MLUDeviceContext
&
dev_ctx
,
const
int
pack_num
,
const
int
axis
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
const
void
*
const
inputs
[],
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Cast
(
const
ExecutionContext
&
ctx
,
cnnlCastDataType_t
cast_type
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Cast
(
const
ExecutionContext
&
ctx
,
cnnlCastDataType_t
cast_type
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Clip
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
min
,
const
void
*
max
,
void
*
y
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
min
,
const
void
*
max
,
void
*
y
);
static
void
HardtanhBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
float
max_val
,
const
float
min_val
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
HardtanhBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
float
max_val
,
const
float
min_val
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
Div
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Fill
(
const
ExecutionContext
&
ctx
,
const
cnnlPointerMode_t
pointer_mode
,
const
void
*
value_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
LRN
(
const
ExecutionContext
&
ctx
,
const
int
local_size
,
const
double
alpha
,
const
double
beta
,
const
double
k
,
const
cnnlPointerMode_t
pointer_mode
,
const
void
*
value_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
LRN
(
const
ExecutionContext
&
ctx
,
const
int
local_size
,
const
double
alpha
,
const
double
beta
,
const
double
k
,
const
cnnlTensorDescriptor_t
input_quant_desc
,
const
void
*
input_quant
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantifyOffline
(
const
ExecutionContext
&
context
,
cnnlQuantizeMode_t
mode
,
...
...
@@ -521,98 +592,158 @@ class MLUCnnl {
static
void
QuantifyOnline
(
const
ExecutionContext
&
context
,
const
int
bitwidth
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
bool
compute_scale
,
void
*
position
,
void
*
scale
,
const
void
*
input
,
const
bool
compute_scale
,
void
*
position
,
void
*
scale
,
const
cnnlTensorDescriptor_t
ouput_desc
,
void
*
output
);
static
void
SGD
(
const
ExecutionContext
&
context
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
cnnlTensorDescriptor_t
var_desc
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
);
static
void
ApplyAdaGrad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
accum_desc
,
void
*
accum
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
void
*
lr
,
const
bool
update_slots
);
const
cnnlTensorDescriptor_t
accum_desc
,
void
*
accum
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
void
*
lr
,
const
bool
update_slots
);
static
void
ApplyRMSProp
(
const
ExecutionContext
&
context
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
momentum
,
const
void
*
epsilon
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
ms_desc
,
void
*
ms
,
const
cnnlTensorDescriptor_t
mom_desc
,
void
*
mom
);
static
void
ApplyCenterRMSProp
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
momentum
,
const
void
*
epsilon
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
mg_desc
,
void
*
mg
,
const
cnnlTensorDescriptor_t
ms_desc
,
void
*
ms
,
const
cnnlTensorDescriptor_t
mom_desc
,
void
*
mom
);
const
void
*
grad
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
momentum
,
const
void
*
epsilon
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
ms_desc
,
void
*
ms
,
const
cnnlTensorDescriptor_t
mom_desc
,
void
*
mom
);
static
void
ApplyCenterRMSProp
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
momentum
,
const
void
*
epsilon
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
mg_desc
,
void
*
mg
,
const
cnnlTensorDescriptor_t
ms_desc
,
void
*
ms
,
const
cnnlTensorDescriptor_t
mom_desc
,
void
*
mom
);
static
void
ApplyAdam
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
m_desc
,
void
*
m
,
const
cnnlTensorDescriptor_t
v_desc
,
void
*
v
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
m_desc
,
void
*
m
,
const
cnnlTensorDescriptor_t
v_desc
,
void
*
v
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
beta1
,
const
void
*
beta2
,
const
void
*
beta1_power
,
const
void
*
beta2_power
,
const
void
*
epsilon
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
beta1
,
const
void
*
beta2
,
const
void
*
beta1_power
,
const
void
*
beta2_power
,
const
void
*
epsilon
,
const
bool
use_nesterov
);
static
void
ApplyAdaMax
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
m_desc
,
void
*
m
,
const
cnnlTensorDescriptor_t
v_desc
,
void
*
v
,
const
void
*
diff
,
const
void
*
lr
,
const
void
*
beta1
,
const
void
*
beta2
,
const
void
*
beta1_power
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
m_desc
,
void
*
m
,
const
cnnlTensorDescriptor_t
v_desc
,
void
*
v
,
const
void
*
diff
,
const
void
*
lr
,
const
void
*
beta1
,
const
void
*
beta2
,
const
void
*
beta1_power
,
const
void
*
epsilon
);
static
void
ApplyMomentum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
bool
use_nesterov
,
const
void
*
lr
,
const
void
*
momentum
,
void
*
var
,
const
void
*
grad
,
const
bool
use_nesterov
,
const
void
*
lr
,
const
void
*
momentum
,
void
*
var
,
void
*
accum
);
static
void
ApplyKerasMomentum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
bool
use_nesterov
,
const
void
*
lr
,
const
void
*
momentum
,
void
*
var
,
void
*
accum
);
const
void
*
grad
,
const
bool
use_nesterov
,
const
void
*
lr
,
const
void
*
momentum
,
void
*
var
,
void
*
accum
);
static
void
ApplyAdadelta
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
diff
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
epsilon
,
void
*
var
,
void
*
accum
,
const
void
*
diff
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
epsilon
,
void
*
var
,
void
*
accum
,
void
*
accum_update
);
static
void
SparseSoftmaxXentWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxMode_t
mode
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
label_desc
,
const
void
*
label
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
void
*
back_out
);
static
void
RandomUniform
(
const
ExecutionContext
&
ctx
,
const
int
num
,
const
ExecutionContext
&
ctx
,
cnnlSoftmaxMode_t
mode
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
label_desc
,
const
void
*
label
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
void
*
back_out
);
static
void
RandomUniform
(
const
ExecutionContext
&
ctx
,
const
int
num
,
const
cnnlDataType_t
data_type
,
const
cnnlRandGenerator_t
mlu_generator
,
void
*
mlu_state
,
void
*
output
);
void
*
mlu_state
,
void
*
output
);
static
void
FusedDropout
(
const
ExecutionContext
&
ctx
,
const
cnnlRandGenerator_t
generator
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
float
p
,
void
*
state
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FusedDropout
(
const
ExecutionContext
&
ctx
,
const
cnnlRandGenerator_t
generator
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
float
p
,
void
*
state
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Cumsum
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
bool
exclusive
,
const
bool
reverse
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
ouput_desc
,
void
*
output
);
static
void
Cumsum
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
bool
exclusive
,
const
bool
reverse
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
ouput_desc
,
void
*
output
);
static
void
BroadcastTo
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
...
...
@@ -620,189 +751,267 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
GatherFunctor
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
int
batch_dims
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
GatherFunctor
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
int
batch_dims
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ScatterRefFunctor
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
updates_desc
,
const
void
*
updates
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlScatterRefMode_t
mode
);
static
void
ScatterRefFunctor
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
updates_desc
,
const
void
*
updates
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlScatterRefMode_t
mode
);
static
void
ScatterFunctor
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
void
*
params
,
const
cnnlTensorDescriptor_t
updates_desc
,
const
void
*
updates
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
int
dim
,
const
void
*
indices
,
const
int
dim
,
const
cnnlScatterMode_t
mode
=
CNNL_SCATTER
);
static
void
Range
(
const
ExecutionContext
&
ctx
,
const
void
*
start
,
const
void
*
end
,
const
void
*
step
,
const
cnnlDataType_t
output_dtype
,
void
*
output
);
static
void
Range
(
const
ExecutionContext
&
ctx
,
const
void
*
start
,
const
void
*
end
,
const
void
*
step
,
const
cnnlDataType_t
output_dtype
,
void
*
output
);
static
void
Round
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
TopK
(
const
ExecutionContext
&
ctx
,
const
int
k
,
const
int
dim
,
const
bool
largest
,
const
bool
sorted
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
static
void
TopK
(
const
ExecutionContext
&
ctx
,
const
int
k
,
const
int
dim
,
const
bool
largest
,
const
bool
sorted
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
values_output_desc
,
void
*
values_out
,
const
cnnlTensorDescriptor_t
indices_output_desc
,
void
*
indices_out
);
static
void
StridedSlice
(
const
ExecutionContext
&
ctx
,
const
int
begin
[],
const
int
end
[],
const
int
strides
[],
static
void
StridedSlice
(
const
ExecutionContext
&
ctx
,
const
int
begin
[],
const
int
end
[],
const
int
strides
[],
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Split
(
const
ExecutionContext
&
ctx
,
int
split_num
,
int
axis
,
static
void
Split
(
const
ExecutionContext
&
ctx
,
int
split_num
,
int
axis
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input_ptr
,
const
cnnlTensorDescriptor_t
output_descs
[],
void
*
output_ptrs
[]);
static
void
Split
(
const
MLUDeviceContext
&
dev_ctx
,
int
split_num
,
int
axis
,
static
void
Split
(
const
MLUDeviceContext
&
dev_ctx
,
int
split_num
,
int
axis
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input_ptr
,
const
cnnlTensorDescriptor_t
output_descs
[],
void
*
output_ptrs
[]);
static
void
Scale
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
alpha_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
beta_desc
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Scale
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
alpha_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
beta_desc
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
AddN
(
const
ExecutionContext
&
ctx
,
uint32_t
input_num
,
static
void
AddN
(
const
ExecutionContext
&
ctx
,
uint32_t
input_num
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
const
void
*
inputs
[],
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Log
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
cnnlLogBase_t
log_base
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
StridedSliceGrad
(
const
ExecutionContext
&
ctx
,
const
int
begin
[],
const
int
end
[],
const
int
strides
[],
cnnlComputationPreference_t
prefer
,
cnnlLogBase_t
log_base
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
StridedSliceGrad
(
const
ExecutionContext
&
ctx
,
const
int
begin
[],
const
int
end
[],
const
int
strides
[],
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Logic
(
const
ExecutionContext
&
ctx
,
const
cnnlLogicOp_t
log_method
,
static
void
Logic
(
const
ExecutionContext
&
ctx
,
const
cnnlLogicOp_t
log_method
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
ouput_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
ouput_desc
,
void
*
output
);
static
void
Select
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
condition_desc
,
const
void
*
condition_ptr
,
const
cnnlTensorDescriptor_t
then_desc
,
const
void
*
then_ptr
,
const
cnnlTensorDescriptor_t
else_desc
,
const
void
*
else_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output_ptr
);
static
void
AssignAdd
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
static
void
Select
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
condition_desc
,
const
void
*
condition_ptr
,
const
cnnlTensorDescriptor_t
then_desc
,
const
void
*
then_ptr
,
const
cnnlTensorDescriptor_t
else_desc
,
const
void
*
else_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output_ptr
);
static
void
AssignAdd
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
update_desc
,
const
void
*
update
,
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
static
void
AssignSub
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
static
void
AssignSub
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
update_desc
,
const
void
*
update
,
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
static
void
Assign
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
update_desc
,
const
void
*
update
,
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
static
void
GatherNd
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BatchToSpace
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlSpaceBatchParam_t
param
);
void
*
output
,
const
cnnlSpaceBatchParam_t
param
);
static
void
BatchToSpaceNd
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
cnnlSpaceBatchNdDescriptor_t
param
,
void
*
extra_device_input
,
size_t
extra_input_size
,
void
*
extra_device_input
,
size_t
extra_input_size
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
PoolingForward
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
int64_t
output_h
,
int64_t
output_w
,
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
void
*
extra_input_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
AdaptivePoolingForward
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
index_desc
,
void
*
index
);
static
void
PoolingForward
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
int64_t
output_h
,
int64_t
output_w
,
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
void
*
extra_input_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Pool3D
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
static
void
AdaptivePoolingForward
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
index_desc
,
void
*
index
);
static
void
Pool3D
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
const
std
::
vector
<
int64_t
>&
output_shape
,
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Pad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
paddings
,
const
void
*
padding_value
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Matmul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
paddings
,
const
void
*
padding_value
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Matmul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BatchMatmul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BatchMatmul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
MulAx
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
alpha_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
alpha_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
OpTensor
(
const
ExecutionContext
&
ctx
,
const
cnnlOpTensorDescriptor_t
op_tensor_desc
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlDataType_t
dtype
,
const
float
alpha1_float
=
1.
f
,
const
float
alpha2_float
=
1.
f
,
const
float
beta_float
=
0.
f
);
static
void
BiasAddGrad
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
static
void
BiasAddGrad
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
output_desc
,
...
...
@@ -810,9 +1019,13 @@ class MLUCnnl {
static
void
OneHot
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
desc_indices
,
const
void
*
indices
,
const
int
depth
,
const
void
*
on_value
,
const
void
*
off_value
,
const
int
axis
,
cnnlDataType_t
output_data_type
,
void
*
output
);
const
void
*
indices
,
const
int
depth
,
const
void
*
on_value
,
const
void
*
off_value
,
const
int
axis
,
cnnlDataType_t
output_data_type
,
void
*
output
);
static
void
NonMaxSuppression
(
const
ExecutionContext
&
ctx
,
const
cnnlNmsDescriptor_t
nms_desc
,
...
...
@@ -821,35 +1034,47 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
confidence_desc
,
const
void
*
confidence
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
void
*
output_size
);
void
*
output
,
void
*
output_size
);
static
void
SoftmaxCrossEntropyWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxMode_t
mode
,
const
ExecutionContext
&
ctx
,
cnnlSoftmaxMode_t
mode
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
logits_in
,
const
cnnlTensorDescriptor_t
label_desc
,
const
void
*
labels_in
,
const
cnnlTensorDescriptor_t
loss_out_desc
,
void
*
loss_out
,
const
cnnlTensorDescriptor_t
back_out_desc
,
void
*
back_out
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
logits_in
,
const
cnnlTensorDescriptor_t
label_desc
,
const
void
*
labels_in
,
const
cnnlTensorDescriptor_t
loss_out_desc
,
void
*
loss_out
,
const
cnnlTensorDescriptor_t
back_out_desc
,
void
*
back_out
);
static
void
SoftmaxForward
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxAlgorithm_t
algorithm
,
cnnlSoftmaxMode_t
mode
,
const
void
*
alpha
,
cnnlSoftmaxMode_t
mode
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
void
*
input
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
SoftmaxBackward
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxAlgorithm_t
algorithm
,
cnnlSoftmaxMode_t
mode
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
SoftmaxBackward
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxAlgorithm_t
algorithm
,
cnnlSoftmaxMode_t
mode
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
Softplus
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
features_desc
,
const
void
*
features
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
SoftplusGrad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
gradients_desc
,
...
...
@@ -860,38 +1085,59 @@ class MLUCnnl {
void
*
output
);
static
void
RsqrtGrad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
y
,
const
void
*
diff_y
,
void
*
output
);
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
y
,
const
void
*
diff_y
,
void
*
output
);
static
void
SqrtGrad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
y
,
const
void
*
diff_y
,
void
*
output
);
static
void
ConvolutionForward
(
const
ExecutionContext
&
ctx
,
cnnlConvolutionDescriptor_t
conv_desc_
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias_ptr
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filtet_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FusedConvBNQuantify
(
const
ExecutionContext
&
ctx
,
cnnlConvolutionDescriptor_t
conv_desc
,
const
void
*
epsilon_ptr
,
const
int
fused_ops_number
,
const
cnnlDataType_t
tensor_dtype
,
const
int
input_position
,
const
float
input_scale
,
const
int
filter_position
,
const
float
filter_scale
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale_ptr
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset_ptr
,
const
cnnlTensorDescriptor_t
mean_desc
,
const
void
*
mean_ptr
,
const
cnnlTensorDescriptor_t
variance_desc
,
const
void
*
variance_ptr
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filtet_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
y
,
const
void
*
diff_y
,
void
*
output
);
static
void
ConvolutionForward
(
const
ExecutionContext
&
ctx
,
cnnlConvolutionDescriptor_t
conv_desc_
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias_ptr
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filtet_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FusedConvBNQuantify
(
const
ExecutionContext
&
ctx
,
cnnlConvolutionDescriptor_t
conv_desc
,
const
void
*
epsilon_ptr
,
const
int
fused_ops_number
,
const
cnnlDataType_t
tensor_dtype
,
const
int
input_position
,
const
float
input_scale
,
const
int
filter_position
,
const
float
filter_scale
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale_ptr
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset_ptr
,
const
cnnlTensorDescriptor_t
mean_desc
,
const
void
*
mean_ptr
,
const
cnnlTensorDescriptor_t
variance_desc
,
const
void
*
variance_ptr
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filtet_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Tile
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
UnsortedSegmentSum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
data_desc
,
...
...
@@ -901,12 +1147,17 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Reduce
(
const
ExecutionContext
&
ctx
,
const
bool
need_workspace
,
static
void
Reduce
(
const
ExecutionContext
&
ctx
,
const
bool
need_workspace
,
const
cnnlReduceDescriptor_t
reduction_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
size_t
indices_size
,
void
*
indices
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
size_t
indices_size
,
void
*
indices
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FloorDiv
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
...
...
@@ -914,34 +1165,41 @@ class MLUCnnl {
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FloorMod
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Maximum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Minimum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
PowR
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DivNoNan
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
...
...
@@ -949,7 +1207,8 @@ class MLUCnnl {
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
SquaredDifference
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input1_desc
,
...
...
@@ -960,52 +1219,73 @@ class MLUCnnl {
void
*
output
);
static
void
L2Loss
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
void
*
output
);
static
void
Abs
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Neg
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Floor
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Ceil
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
IsNan
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Square
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Sqrt
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Rsqrt
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Cos
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Sin
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
TrigonForward
(
const
ExecutionContext
&
ctx
,
const
cnnlTrigonDescriptor_t
trigon_desc
,
...
...
@@ -1016,31 +1296,41 @@ class MLUCnnl {
static
void
Exp
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Sign
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
IsFinite
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
IsNanInf
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
void
*
output
);
const
void
*
input
,
void
*
output
);
static
void
Erf
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Log1p
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
LogicalNot
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
...
...
@@ -1048,152 +1338,250 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DynamicStitch
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
*
indices_desc
,
const
int
**
indices
,
const
cnnlTensorDescriptor_t
*
data_desc
,
const
void
**
data
,
const
int
size
,
int
*
indices_dims
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
CropAndResize
(
const
ExecutionContext
&
ctx
,
const
std
::
string
method_name
,
const
float
extrapolation_value
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_index_desc
,
const
void
*
box_index
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DynamicStitch
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
*
indices_desc
,
const
int
**
indices
,
const
cnnlTensorDescriptor_t
*
data_desc
,
const
void
**
data
,
const
int
size
,
int
*
indices_dims
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
CropAndResize
(
const
ExecutionContext
&
ctx
,
const
std
::
string
method_name
,
const
float
extrapolation_value
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_index_desc
,
const
void
*
box_index
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
CropAndResizeBackwardImage
(
const
ExecutionContext
&
ctx
,
const
std
::
string
method_name
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_idx_desc
,
const
void
*
box_idx
,
const
cnnlTensorDescriptor_t
grads_image_desc
,
void
*
grads_image
);
const
ExecutionContext
&
ctx
,
const
std
::
string
method_name
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_idx_desc
,
const
void
*
box_idx
,
const
cnnlTensorDescriptor_t
grads_image_desc
,
void
*
grads_image
);
static
void
CropAndResizeBackwardBoxes
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_idx_desc
,
const
void
*
box_idx
,
const
cnnlTensorDescriptor_t
output_desc
,
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_idx_desc
,
const
void
*
box_idx
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
PoolingBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
AdaptivePoolingBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingMode_t
pool_mode
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
index_desc
,
const
void
*
index
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
PoolingBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
AdaptivePoolingBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingMode_t
pool_mode
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
index_desc
,
const
void
*
index
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
PoolingIndex
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingDescriptor_t
pooling_desc
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
y
);
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
y
);
static
void
SpaceToBatch
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
int64_t
block_shape
[]);
void
*
output
,
const
int64_t
block_shape
[]);
static
void
SpaceToBatchNd
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
cnnlSpaceBatchNdDescriptor_t
param
,
void
*
extra_device_input
,
size_t
extra_input_size
,
void
*
extra_device_input
,
size_t
extra_input_size
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Interp
(
const
ExecutionContext
&
ctx
,
const
cnnlInterpMode_t
mode
,
const
bool
align_corners
,
const
bool
half_pixel_centers
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Interp
(
const
ExecutionContext
&
ctx
,
const
cnnlInterpMode_t
mode
,
const
bool
align_corners
,
const
bool
half_pixel_centers
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
InterpBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlInterpBackwardMode_t
mode
,
const
bool
align_corners
,
const
bool
half_pixel_centers
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
InterpBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlInterpBackwardMode_t
mode
,
const
bool
align_corners
,
const
bool
half_pixel_centers
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeParam
(
const
ExecutionContext
&
ctx
,
const
cnnlQuantizeMode_t
mode
,
const
int
bitwidth
,
const
cnnlQuantizeMode_t
mode
,
const
int
bitwidth
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
void
*
position
,
void
*
scale
,
const
void
*
input
,
void
*
position
,
void
*
scale
,
void
*
offset
);
static
void
QuantizeMatMul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeBatchMatMul
(
const
ExecutionContext
&
ctx
,
const
bool
adj_x
,
const
bool
adj_y
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeBatchMatMulBCast
(
const
ExecutionContext
&
ctx
,
const
bool
adj_x
,
const
bool
adj_y
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FusedBatchNorm
(
const
ExecutionContext
&
ctx
,
const
bool
is_training
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale
,
const
void
*
offset
,
const
void
*
estimated_mean
,
const
void
*
estimated_variance
,
float
epsilon
,
float
momentum
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
void
*
batch_mean
,
void
*
batch_var
,
void
*
saved_mean
,
void
*
saved_var
);
static
void
FusedBatchNormGrad
(
const
ExecutionContext
&
ctx
,
const
bool
is_training
,
const
cnnlTensorDescriptor_t
y_backprop_desc
,
const
void
*
y_backprop
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale
,
const
void
*
saved_mean
,
const
void
*
saved_var
,
float
epsilon
,
const
cnnlTensorDescriptor_t
x_backprop_desc
,
void
*
x_backprop
,
void
*
scale_backprop
,
void
*
offset_backprop
);
static
void
LayerNormForward
(
const
ExecutionContext
&
ctx
,
int
axis
,
static
void
QuantizeMatMul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeBatchMatMul
(
const
ExecutionContext
&
ctx
,
const
bool
adj_x
,
const
bool
adj_y
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeBatchMatMulBCast
(
const
ExecutionContext
&
ctx
,
const
bool
adj_x
,
const
bool
adj_y
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FusedBatchNorm
(
const
ExecutionContext
&
ctx
,
const
bool
is_training
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale
,
const
void
*
offset
,
const
void
*
estimated_mean
,
const
void
*
estimated_variance
,
float
epsilon
,
float
momentum
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
void
*
batch_mean
,
void
*
batch_var
,
void
*
saved_mean
,
void
*
saved_var
);
static
void
FusedBatchNormGrad
(
const
ExecutionContext
&
ctx
,
const
bool
is_training
,
const
cnnlTensorDescriptor_t
y_backprop_desc
,
const
void
*
y_backprop
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale
,
const
void
*
saved_mean
,
const
void
*
saved_var
,
float
epsilon
,
const
cnnlTensorDescriptor_t
x_backprop_desc
,
void
*
x_backprop
,
void
*
scale_backprop
,
void
*
offset_backprop
);
static
void
LayerNormForward
(
const
ExecutionContext
&
ctx
,
int
axis
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
weight_bias_desc
,
const
void
*
weight
,
const
void
*
bias
,
float
eps
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
y
,
const
void
*
weight
,
const
void
*
bias
,
float
eps
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
y
,
const
cnnlTensorDescriptor_t
mean_rstd_desc
,
void
*
saved_mean
,
void
*
saved_rstd
);
static
void
LayerNormBackward
(
const
ExecutionContext
&
ctx
,
int
axis
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_z_desc
,
const
void
*
diff_z
,
const
cnnlTensorDescriptor_t
weight_bias_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
mean_rstd_desc
,
const
void
*
saved_mean
,
const
void
*
saved_rstd
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
,
void
*
diff_weight
,
void
*
diff_bias
);
void
*
saved_mean
,
void
*
saved_rstd
);
static
void
LayerNormBackward
(
const
ExecutionContext
&
ctx
,
int
axis
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_z_desc
,
const
void
*
diff_z
,
const
cnnlTensorDescriptor_t
weight_bias_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
mean_rstd_desc
,
const
void
*
saved_mean
,
const
void
*
saved_rstd
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
,
void
*
diff_weight
,
void
*
diff_bias
);
static
void
Transpose
(
const
ExecutionContext
&
ctx
,
const
std
::
vector
<
int
>
perm
,
const
int
input_dim
,
const
std
::
vector
<
int
>
perm
,
const
int
input_dim
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
TrilTriu
(
const
ExecutionContext
&
ctx
,
const
int
diagonal_k
,
const
bool
tri_up_mode
,
...
...
@@ -1203,109 +1591,170 @@ class MLUCnnl {
static
void
MatrixBandPart
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
input
,
const
int
num_lower
,
const
int
num_upper
,
void
*
output
);
const
void
*
input
,
const
int
num_lower
,
const
int
num_upper
,
void
*
output
);
static
void
NumTrue
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
Tensor
index
,
uint32_t
*
num_true
);
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
Tensor
index
,
uint32_t
*
num_true
);
static
void
Where
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
uint32_t
*
strides
,
const
uint32_t
*
index
,
const
cnnlTensorDescriptor_t
y_desc
,
int
*
y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
uint32_t
*
strides
,
const
uint32_t
*
index
,
const
cnnlTensorDescriptor_t
y_desc
,
int
*
y
,
const
bool
as_tuple
);
static
void
Conv2D
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
input_position
,
const
void
*
input_scale
,
const
void
*
input_offset
,
const
void
*
filter_position
,
const
void
*
filter_scale
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
input_position
,
const
void
*
input_scale
,
const
void
*
input_offset
,
const
void
*
filter_position
,
const
void
*
filter_scale
,
const
void
*
filter_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filter_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias
,
const
cnnlTensorDescriptor_t
output_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ConvBackpropInput
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlTensorDescriptor_t
filter_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
in_backprop_desc
,
void
*
in_backprop
);
static
void
ConvBackpropInput
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlTensorDescriptor_t
filter_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
in_backprop_desc
,
void
*
in_backprop
);
static
void
QuantizeConvBackpropInput
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
filter_position
,
const
void
*
filter_scale
,
const
void
*
filter_offset
,
const
void
*
out_backprop_position
,
const
void
*
out_backprop_scale
,
const
void
*
out_backprop_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
in_backprop_desc
,
void
*
in_backprop
);
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
filter_position
,
const
void
*
filter_scale
,
const
void
*
filter_offset
,
const
void
*
out_backprop_position
,
const
void
*
out_backprop_scale
,
const
void
*
out_backprop_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
in_backprop_desc
,
void
*
in_backprop
);
static
void
ConvBackpropFilter
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
filter_backprop_desc
,
void
*
filter_backprop
);
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
filter_backprop_desc
,
void
*
filter_backprop
);
static
void
QuantizeConvBackpropFilter
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
input_position
,
const
void
*
input_scale
,
const
void
*
input_offset
,
const
void
*
out_backprop_position
,
const
void
*
out_backprop_scale
,
const
void
*
out_backprop_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
filter_backprop_desc
,
void
*
filter_backprop
);
static
void
DCNForward
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DCNBackwardData
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
grad_output_desc
,
const
void
*
grad_output
,
const
cnnlTensorDescriptor_t
grad_input_desc
,
void
*
grad_input
,
const
cnnlTensorDescriptor_t
grad_offset_desc
,
void
*
grad_offset
,
const
cnnlTensorDescriptor_t
grad_mask_desc
,
void
*
grad_mask
);
static
void
DCNBackwardWeight
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
grad_output_desc
,
const
void
*
grad_output
,
const
cnnlTensorDescriptor_t
grad_weight_desc
,
void
*
grad_weight
,
const
cnnlTensorDescriptor_t
grad_bias_desc
,
void
*
grad_bias
);
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
input_position
,
const
void
*
input_scale
,
const
void
*
input_offset
,
const
void
*
out_backprop_position
,
const
void
*
out_backprop_scale
,
const
void
*
out_backprop_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
filter_backprop_desc
,
void
*
filter_backprop
);
static
void
DCNForward
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DCNBackwardData
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
grad_output_desc
,
const
void
*
grad_output
,
const
cnnlTensorDescriptor_t
grad_input_desc
,
void
*
grad_input
,
const
cnnlTensorDescriptor_t
grad_offset_desc
,
void
*
grad_offset
,
const
cnnlTensorDescriptor_t
grad_mask_desc
,
void
*
grad_mask
);
static
void
DCNBackwardWeight
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
grad_output_desc
,
const
void
*
grad_output
,
const
cnnlTensorDescriptor_t
grad_weight_desc
,
void
*
grad_weight
,
const
cnnlTensorDescriptor_t
grad_bias_desc
,
void
*
grad_bias
);
static
void
InTopK
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
predictions_desc
,
const
void
*
predictions
,
const
cnnlTensorDescriptor_t
targets_desc
,
const
void
*
targets
,
const
cnnlTensorDescriptor_t
k_desc
,
const
void
*
k
,
const
int
k_int
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
void
*
targets
,
const
cnnlTensorDescriptor_t
k_desc
,
const
void
*
k
,
const
int
k_int
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ScatterNd
(
const
ExecutionContext
&
ctx
,
cnnlScatterNdMode_t
mode
,
static
void
ScatterNd
(
const
ExecutionContext
&
ctx
,
cnnlScatterNdMode_t
mode
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
updates_desc
,
const
void
*
updates
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BitWise
(
const
ExecutionContext
&
ctx
,
const
cnnlBitComputeOp_t
optype
,
...
...
@@ -1313,12 +1762,17 @@ class MLUCnnl {
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QR
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
cnnlTensorDescriptor_t
q_desc
,
void
*
q
,
const
cnnlTensorDescriptor_t
r_desc
,
void
*
r
,
const
bool
some
);
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
cnnlTensorDescriptor_t
q_desc
,
void
*
q
,
const
cnnlTensorDescriptor_t
r_desc
,
void
*
r
,
const
bool
some
);
static
void
Reciprocal
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
...
...
@@ -1326,55 +1780,85 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceLoss
(
const
ExecutionContext
&
ctx
,
const
cnnlBceLossReduction_t
reduction
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceLossBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlBceLossReduction_t
reduction
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
EmbeddingForward
(
const
ExecutionContext
&
ctx
,
const
int
padding_idx
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
int
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Transform
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
static
void
BceLoss
(
const
ExecutionContext
&
ctx
,
const
cnnlBceLossReduction_t
reduction
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceLossBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlBceLossReduction_t
reduction
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
EmbeddingForward
(
const
ExecutionContext
&
ctx
,
const
int
padding_idx
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
int
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Transform
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
EmbeddingBackward
(
const
ExecutionContext
&
ctx
,
int
padding_idx
,
bool
scale_grad_by_freq
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
diff_desc
,
const
void
*
diff
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
pos_weight_desc
,
const
void
*
pos_weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
EmbeddingBackward
(
const
ExecutionContext
&
ctx
,
int
padding_idx
,
bool
scale_grad_by_freq
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
diff_desc
,
const
void
*
diff
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
pos_weight_desc
,
const
void
*
pos_weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceWithLogitsBackward
(
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
pos_weight_desc
,
const
void
*
pos_weight
,
const
cnnlTensorDescriptor_t
diff_input_desc
,
void
*
diff_input
);
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
pos_weight_desc
,
const
void
*
pos_weight
,
const
cnnlTensorDescriptor_t
diff_input_desc
,
void
*
diff_input
);
};
template
<
typename
T
>
...
...
@@ -1393,22 +1877,27 @@ inline void TransposeFromMLUTensor(const ExecutionContext& ctx,
transformed_output
->
mutable_data
<
T
>
(
framework
::
DDim
(
output_shape
.
data
(),
dim_size
),
ctx
.
GetPlace
());
}
MLUCnnlTensorDesc
trans_in_desc
(
*
transformed_input
,
CNNL_LAYOUT_ARRAY
,
ToCnnlDataType
<
T
>
());
MLUCnnlTensorDesc
trans_out_desc
(
*
transformed_output
,
CNNL_LAYOUT_ARRAY
,
ToCnnlDataType
<
T
>
());
MLUCnnl
::
Transpose
(
ctx
,
perm
,
dim_size
,
trans_in_desc
.
get
(),
GetBasePtr
(
transformed_input
),
trans_out_desc
.
get
(),
MLUCnnlTensorDesc
trans_in_desc
(
*
transformed_input
,
CNNL_LAYOUT_ARRAY
,
ToCnnlDataType
<
T
>
());
MLUCnnlTensorDesc
trans_out_desc
(
*
transformed_output
,
CNNL_LAYOUT_ARRAY
,
ToCnnlDataType
<
T
>
());
MLUCnnl
::
Transpose
(
ctx
,
perm
,
dim_size
,
trans_in_desc
.
get
(),
GetBasePtr
(
transformed_input
),
trans_out_desc
.
get
(),
GetBasePtr
(
transformed_output
));
}
template
<
typename
T
>
inline
void
FillMLUTensorWithHostValue
(
const
ExecutionContext
&
ctx
,
T
value
,
inline
void
FillMLUTensorWithHostValue
(
const
ExecutionContext
&
ctx
,
T
value
,
Tensor
*
out
)
{
MLUCnnlTensorDesc
out_desc
(
*
out
);
MLUCnnl
::
Fill
(
ctx
,
CNNL_POINTER_MODE_HOST
,
&
value
,
out_desc
.
get
(),
GetBasePtr
(
out
));
MLUCnnl
::
Fill
(
ctx
,
CNNL_POINTER_MODE_HOST
,
&
value
,
out_desc
.
get
(),
GetBasePtr
(
out
));
}
}
// namespace operators
...
...
paddle/fluid/operators/scatter_op_mlu.cc
浏览文件 @
0d17c047
...
...
@@ -33,26 +33,43 @@ class ScatterMLUKernel : public framework::OpKernel<T> {
cnnlScatterRefMode_t
mode
;
if
(
overwrite
)
{
mode
=
CNNL_SCATTERREF_UPDATE
;
MLUCnnl
::
ScatterFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
updates_desc
.
get
(),
GetBasePtr
(
updates
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
MLUCnnl
::
ScatterRefFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
updates_desc
.
get
(),
GetBasePtr
(
updates
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
}
else
{
Tensor
tensor_zeros
(
updates
->
type
());
tensor_zeros
.
mutable_data
<
T
>
(
updates
->
dims
(),
ctx
.
GetPlace
());
MLUCnnlTensorDesc
tensor_zeros_desc
(
tensor_zeros
);
float
value
=
0.0
;
auto
value_t
=
static_cast
<
T
>
(
value
);
MLUCnnl
::
Fill
(
ctx
,
CNNL_POINTER_MODE_HOST
,
&
value_t
,
tensor_zeros_desc
.
get
(),
GetBasePtr
(
&
tensor_zeros
));
MLUCnnl
::
Fill
(
ctx
,
CNNL_POINTER_MODE_HOST
,
&
value_t
,
tensor_zeros_desc
.
get
(),
GetBasePtr
(
&
tensor_zeros
));
mode
=
CNNL_SCATTERREF_UPDATE
;
MLUCnnl
::
ScatterFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
tensor_zeros_desc
.
get
(),
GetBasePtr
(
&
tensor_zeros
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
MLUCnnl
::
ScatterRefFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
tensor_zeros_desc
.
get
(),
GetBasePtr
(
&
tensor_zeros
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
mode
=
CNNL_SCATTERREF_ADD
;
MLUCnnl
::
ScatterFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
updates_desc
.
get
(),
GetBasePtr
(
updates
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
MLUCnnl
::
ScatterRefFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
updates_desc
.
get
(),
GetBasePtr
(
updates
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
}
paddle
::
framework
::
TensorCopy
(
*
x
,
place
,
out
);
}
...
...
@@ -62,5 +79,6 @@ class ScatterMLUKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_MLU_KERNEL
(
scatter
,
ops
::
ScatterMLUKernel
<
float
>
,
REGISTER_OP_MLU_KERNEL
(
scatter
,
ops
::
ScatterMLUKernel
<
float
>
,
ops
::
ScatterMLUKernel
<
paddle
::
platform
::
float16
>
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录