Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0d17c047
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0d17c047
编写于
6月 24, 2022
作者:
Z
zhaoying9105
提交者:
GitHub
6月 24, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU](bugfix): fix MLUCnnl::ScatterFunctor function declare bug (#43778)
上级
03972d5a
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
1061 addition
and
554 deletion
+1061
-554
paddle/fluid/operators/mlu/mlu_baseop.h
paddle/fluid/operators/mlu/mlu_baseop.h
+1030
-541
paddle/fluid/operators/scatter_op_mlu.cc
paddle/fluid/operators/scatter_op_mlu.cc
+31
-13
未找到文件。
paddle/fluid/operators/mlu/mlu_baseop.h
浏览文件 @
0d17c047
...
...
@@ -35,9 +35,12 @@ using DeviceContextPool = platform::DeviceContextPool;
using
MLUDeviceContext
=
platform
::
MLUDeviceContext
;
const
std
::
map
<
std
::
string
,
cnnlReduceOp_t
>
MLUReduceOpMap
=
{
{
"reduce_all"
,
CNNL_REDUCE_AND
},
{
"reduce_any"
,
CNNL_REDUCE_OR
},
{
"reduce_max"
,
CNNL_REDUCE_MAX
},
{
"reduce_mean"
,
CNNL_REDUCE_AVG
},
{
"reduce_min"
,
CNNL_REDUCE_MIN
},
{
"reduce_sum"
,
CNNL_REDUCE_ADD
},
{
"reduce_all"
,
CNNL_REDUCE_AND
},
{
"reduce_any"
,
CNNL_REDUCE_OR
},
{
"reduce_max"
,
CNNL_REDUCE_MAX
},
{
"reduce_mean"
,
CNNL_REDUCE_AVG
},
{
"reduce_min"
,
CNNL_REDUCE_MIN
},
{
"reduce_sum"
,
CNNL_REDUCE_ADD
},
{
"reduce_prod"
,
CNNL_REDUCE_MUL
},
};
...
...
@@ -225,36 +228,49 @@ class MLUCnnlTensorDesc {
MLUCnnlTensorDesc
&
operator
=
(
MLUCnnlTensorDesc
&&
rhs
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
const
cnnlTensorLayout_t
layout
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
const
cnnlTensorLayout_t
layout
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
const
cnnlTensorLayout_t
layout
,
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
const
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
);
explicit
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
);
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
,
int
position
);
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
,
int
position
,
MLUCnnlTensorDesc
(
const
Tensor
&
tensor
,
cnnlTensorLayout_t
layout
,
const
cnnlDataType_t
tensor_dtype
,
int
position
,
float
scale
);
~
MLUCnnlTensorDesc
();
...
...
@@ -270,8 +286,10 @@ class MLUCnnlActivationDesc {
MLUCnnlActivationDesc
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
MLUCnnlActivationDesc
&
operator
=
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
);
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
,
const
float
sliced_dim
,
const
float
selu_alpha
,
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
,
const
float
sliced_dim
,
const
float
selu_alpha
,
const
float
selu_lambda
);
const
cnnlActivationDescriptor_t
get
()
const
;
...
...
@@ -288,14 +306,22 @@ class MLUCnnlPoolingDesc {
MLUCnnlPoolingDesc
(
const
cnnlPoolingMode_t
mode
,
const
cnnlNanPropagation_t
maxpooling_nan_opt
,
int
window_rows
,
int
window_cols
,
int64_t
pad_up
,
int64_t
pad_down
,
int64_t
pad_left
,
int64_t
pad_right
,
int
row_stride
,
int
col_stride
,
int
row_dilation
,
int
col_dilation
,
bool
ceil_mode
);
int
window_rows
,
int
window_cols
,
int64_t
pad_up
,
int64_t
pad_down
,
int64_t
pad_left
,
int64_t
pad_right
,
int
row_stride
,
int
col_stride
,
int
row_dilation
,
int
col_dilation
,
bool
ceil_mode
);
MLUCnnlPoolingDesc
(
const
cnnlPoolingMode_t
mode
,
const
cnnlNanPropagation_t
maxpooling_nan_opt
,
const
int
tensor_rank
,
const
std
::
vector
<
int
>&
window
,
const
int
tensor_rank
,
const
std
::
vector
<
int
>&
window
,
const
std
::
vector
<
int
>&
padding
,
const
std
::
vector
<
int
>&
stride
);
...
...
@@ -364,8 +390,10 @@ class MLUCnnlNMSDesc {
MLUCnnlNMSDesc
(
const
MLUCnnlNMSDesc
&
desc
)
=
delete
;
MLUCnnlNMSDesc
&
operator
=
(
const
MLUCnnlNMSDesc
&
desc
)
=
delete
;
MLUCnnlNMSDesc
(
const
cnnlNmsOutputMode_t
mode
,
const
float
iou_threshold
,
const
int
max_output_size
,
const
float
confidence_threshold
,
MLUCnnlNMSDesc
(
const
cnnlNmsOutputMode_t
mode
,
const
float
iou_threshold
,
const
int
max_output_size
,
const
float
confidence_threshold
,
const
int
input_layout
);
const
cnnlNmsDescriptor_t
get
()
const
;
...
...
@@ -378,12 +406,17 @@ class MLUCnnlNMSDesc {
class
MLUCnnlConvolutionDesc
{
public:
MLUCnnlConvolutionDesc
(
const
int
dims
,
const
int
pad
[],
const
int
stride
[],
const
int
dilation
[],
const
int
group_count
,
MLUCnnlConvolutionDesc
(
const
int
dims
,
const
int
pad
[],
const
int
stride
[],
const
int
dilation
[],
const
int
group_count
,
const
cnnlDataType_t
tensor_dtype
);
MLUCnnlConvolutionDesc
(
const
int
dims
,
const
int64_t
pad
[],
const
int64_t
stride
[],
const
int64_t
dilation
[],
MLUCnnlConvolutionDesc
(
const
int
dims
,
const
int64_t
pad
[],
const
int64_t
stride
[],
const
int64_t
dilation
[],
const
int
group_count
,
const
cnnlDataType_t
tensor_dtype
);
...
...
@@ -402,7 +435,8 @@ class MLUCnnlConvolutionDesc {
class
MLUCnnlBatchSpaceDesc
{
public:
MLUCnnlBatchSpaceDesc
(
uint32_t
block_shape
[],
uint32_t
paddings
[],
MLUCnnlBatchSpaceDesc
(
uint32_t
block_shape
[],
uint32_t
paddings
[],
const
uint32_t
block_shape_size
,
const
uint32_t
paddings_size
);
...
...
@@ -446,8 +480,12 @@ class MLUCnnlTrigonDesc {
class
MLUCnnlDCNDesc
{
public:
MLUCnnlDCNDesc
(
int
dimNb
,
const
int
*
pad
,
const
int
*
stride
,
const
int
*
dilation
,
int
deformable_group
,
int
conv_group
,
MLUCnnlDCNDesc
(
int
dimNb
,
const
int
*
pad
,
const
int
*
stride
,
const
int
*
dilation
,
int
deformable_group
,
int
conv_group
,
int
im2col_step
);
const
cnnlDCNDescriptor_t
get
()
const
;
...
...
@@ -461,55 +499,88 @@ class MLUCnnl {
public:
static
void
Active
(
const
ExecutionContext
&
ctx
,
cnnlActivationDescriptor_t
active_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ActiveGrad
(
const
ExecutionContext
&
ctx
,
cnnlActivationDescriptor_t
active_desc
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
ActiveGrad
(
const
ExecutionContext
&
ctx
,
cnnlActivationDescriptor_t
active_desc
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
Concat
(
const
ExecutionContext
&
ctx
,
const
int
pack_num
,
const
int
axis
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
static
void
Concat
(
const
ExecutionContext
&
ctx
,
const
int
pack_num
,
const
int
axis
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
const
void
*
const
inputs
[],
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Concat
(
const
MLUDeviceContext
&
dev_ctx
,
const
int
pack_num
,
const
int
axis
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
static
void
Concat
(
const
MLUDeviceContext
&
dev_ctx
,
const
int
pack_num
,
const
int
axis
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
const
void
*
const
inputs
[],
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Cast
(
const
ExecutionContext
&
ctx
,
cnnlCastDataType_t
cast_type
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Cast
(
const
ExecutionContext
&
ctx
,
cnnlCastDataType_t
cast_type
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Clip
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
min
,
const
void
*
max
,
void
*
y
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
min
,
const
void
*
max
,
void
*
y
);
static
void
HardtanhBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
float
max_val
,
const
float
min_val
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
HardtanhBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
float
max_val
,
const
float
min_val
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
Div
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Fill
(
const
ExecutionContext
&
ctx
,
const
cnnlPointerMode_t
pointer_mode
,
const
void
*
value_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlPointerMode_t
pointer_mode
,
const
void
*
value_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
LRN
(
const
ExecutionContext
&
ctx
,
const
int
local_size
,
const
double
alpha
,
const
double
beta
,
const
double
k
,
static
void
LRN
(
const
ExecutionContext
&
ctx
,
const
int
local_size
,
const
double
alpha
,
const
double
beta
,
const
double
k
,
const
cnnlTensorDescriptor_t
input_quant_desc
,
const
void
*
input_quant
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantifyOffline
(
const
ExecutionContext
&
context
,
cnnlQuantizeMode_t
mode
,
...
...
@@ -521,98 +592,158 @@ class MLUCnnl {
static
void
QuantifyOnline
(
const
ExecutionContext
&
context
,
const
int
bitwidth
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
bool
compute_scale
,
void
*
position
,
void
*
scale
,
const
void
*
input
,
const
bool
compute_scale
,
void
*
position
,
void
*
scale
,
const
cnnlTensorDescriptor_t
ouput_desc
,
void
*
output
);
static
void
SGD
(
const
ExecutionContext
&
context
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
cnnlTensorDescriptor_t
var_desc
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
);
static
void
ApplyAdaGrad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
accum_desc
,
void
*
accum
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
void
*
lr
,
const
bool
update_slots
);
const
cnnlTensorDescriptor_t
accum_desc
,
void
*
accum
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
void
*
lr
,
const
bool
update_slots
);
static
void
ApplyRMSProp
(
const
ExecutionContext
&
context
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
momentum
,
const
void
*
epsilon
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
ms_desc
,
void
*
ms
,
const
cnnlTensorDescriptor_t
mom_desc
,
void
*
mom
);
static
void
ApplyCenterRMSProp
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
momentum
,
const
void
*
epsilon
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
mg_desc
,
void
*
mg
,
const
cnnlTensorDescriptor_t
ms_desc
,
void
*
ms
,
const
cnnlTensorDescriptor_t
mom_desc
,
void
*
mom
);
const
void
*
grad
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
momentum
,
const
void
*
epsilon
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
ms_desc
,
void
*
ms
,
const
cnnlTensorDescriptor_t
mom_desc
,
void
*
mom
);
static
void
ApplyCenterRMSProp
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
momentum
,
const
void
*
epsilon
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
mg_desc
,
void
*
mg
,
const
cnnlTensorDescriptor_t
ms_desc
,
void
*
ms
,
const
cnnlTensorDescriptor_t
mom_desc
,
void
*
mom
);
static
void
ApplyAdam
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
m_desc
,
void
*
m
,
const
cnnlTensorDescriptor_t
v_desc
,
void
*
v
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
m_desc
,
void
*
m
,
const
cnnlTensorDescriptor_t
v_desc
,
void
*
v
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
beta1
,
const
void
*
beta2
,
const
void
*
beta1_power
,
const
void
*
beta2_power
,
const
void
*
epsilon
,
const
void
*
grad
,
const
void
*
lr
,
const
void
*
beta1
,
const
void
*
beta2
,
const
void
*
beta1_power
,
const
void
*
beta2_power
,
const
void
*
epsilon
,
const
bool
use_nesterov
);
static
void
ApplyAdaMax
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
m_desc
,
void
*
m
,
const
cnnlTensorDescriptor_t
v_desc
,
void
*
v
,
const
void
*
diff
,
const
void
*
lr
,
const
void
*
beta1
,
const
void
*
beta2
,
const
void
*
beta1_power
,
const
cnnlTensorDescriptor_t
var_desc
,
void
*
var
,
const
cnnlTensorDescriptor_t
m_desc
,
void
*
m
,
const
cnnlTensorDescriptor_t
v_desc
,
void
*
v
,
const
void
*
diff
,
const
void
*
lr
,
const
void
*
beta1
,
const
void
*
beta2
,
const
void
*
beta1_power
,
const
void
*
epsilon
);
static
void
ApplyMomentum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
bool
use_nesterov
,
const
void
*
lr
,
const
void
*
momentum
,
void
*
var
,
const
void
*
grad
,
const
bool
use_nesterov
,
const
void
*
lr
,
const
void
*
momentum
,
void
*
var
,
void
*
accum
);
static
void
ApplyKerasMomentum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
bool
use_nesterov
,
const
void
*
lr
,
const
void
*
momentum
,
void
*
var
,
void
*
accum
);
const
void
*
grad
,
const
bool
use_nesterov
,
const
void
*
lr
,
const
void
*
momentum
,
void
*
var
,
void
*
accum
);
static
void
ApplyAdadelta
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
diff
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
epsilon
,
void
*
var
,
void
*
accum
,
const
void
*
diff
,
const
void
*
lr
,
const
void
*
rho
,
const
void
*
epsilon
,
void
*
var
,
void
*
accum
,
void
*
accum_update
);
static
void
SparseSoftmaxXentWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxMode_t
mode
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
label_desc
,
const
void
*
label
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
void
*
back_out
);
static
void
RandomUniform
(
const
ExecutionContext
&
ctx
,
const
int
num
,
const
ExecutionContext
&
ctx
,
cnnlSoftmaxMode_t
mode
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
label_desc
,
const
void
*
label
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
void
*
back_out
);
static
void
RandomUniform
(
const
ExecutionContext
&
ctx
,
const
int
num
,
const
cnnlDataType_t
data_type
,
const
cnnlRandGenerator_t
mlu_generator
,
void
*
mlu_state
,
void
*
output
);
void
*
mlu_state
,
void
*
output
);
static
void
FusedDropout
(
const
ExecutionContext
&
ctx
,
const
cnnlRandGenerator_t
generator
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
float
p
,
void
*
state
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FusedDropout
(
const
ExecutionContext
&
ctx
,
const
cnnlRandGenerator_t
generator
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
float
p
,
void
*
state
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Cumsum
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
bool
exclusive
,
const
bool
reverse
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
ouput_desc
,
void
*
output
);
static
void
Cumsum
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
bool
exclusive
,
const
bool
reverse
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
ouput_desc
,
void
*
output
);
static
void
BroadcastTo
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
...
...
@@ -620,189 +751,267 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
GatherFunctor
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
int
batch_dims
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
GatherFunctor
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
int
batch_dims
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ScatterRefFunctor
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
updates_desc
,
const
void
*
updates
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlScatterRefMode_t
mode
);
static
void
ScatterRefFunctor
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
updates_desc
,
const
void
*
updates
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlScatterRefMode_t
mode
);
static
void
ScatterFunctor
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
void
*
params
,
const
cnnlTensorDescriptor_t
updates_desc
,
const
void
*
updates
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
int
dim
,
const
void
*
indices
,
const
int
dim
,
const
cnnlScatterMode_t
mode
=
CNNL_SCATTER
);
static
void
Range
(
const
ExecutionContext
&
ctx
,
const
void
*
start
,
const
void
*
end
,
const
void
*
step
,
const
cnnlDataType_t
output_dtype
,
void
*
output
);
static
void
Range
(
const
ExecutionContext
&
ctx
,
const
void
*
start
,
const
void
*
end
,
const
void
*
step
,
const
cnnlDataType_t
output_dtype
,
void
*
output
);
static
void
Round
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
TopK
(
const
ExecutionContext
&
ctx
,
const
int
k
,
const
int
dim
,
const
bool
largest
,
const
bool
sorted
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
static
void
TopK
(
const
ExecutionContext
&
ctx
,
const
int
k
,
const
int
dim
,
const
bool
largest
,
const
bool
sorted
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
values_output_desc
,
void
*
values_out
,
const
cnnlTensorDescriptor_t
indices_output_desc
,
void
*
indices_out
);
static
void
StridedSlice
(
const
ExecutionContext
&
ctx
,
const
int
begin
[],
const
int
end
[],
const
int
strides
[],
static
void
StridedSlice
(
const
ExecutionContext
&
ctx
,
const
int
begin
[],
const
int
end
[],
const
int
strides
[],
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Split
(
const
ExecutionContext
&
ctx
,
int
split_num
,
int
axis
,
static
void
Split
(
const
ExecutionContext
&
ctx
,
int
split_num
,
int
axis
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input_ptr
,
const
cnnlTensorDescriptor_t
output_descs
[],
void
*
output_ptrs
[]);
static
void
Split
(
const
MLUDeviceContext
&
dev_ctx
,
int
split_num
,
int
axis
,
static
void
Split
(
const
MLUDeviceContext
&
dev_ctx
,
int
split_num
,
int
axis
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input_ptr
,
const
cnnlTensorDescriptor_t
output_descs
[],
void
*
output_ptrs
[]);
static
void
Scale
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
alpha_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
beta_desc
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Scale
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
alpha_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
beta_desc
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
AddN
(
const
ExecutionContext
&
ctx
,
uint32_t
input_num
,
static
void
AddN
(
const
ExecutionContext
&
ctx
,
uint32_t
input_num
,
const
cnnlTensorDescriptor_t
inputs_desc
[],
const
void
*
inputs
[],
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Log
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
cnnlLogBase_t
log_base
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
cnnlComputationPreference_t
prefer
,
cnnlLogBase_t
log_base
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
StridedSliceGrad
(
const
ExecutionContext
&
ctx
,
const
int
begin
[],
const
int
end
[],
const
int
strides
[],
static
void
StridedSliceGrad
(
const
ExecutionContext
&
ctx
,
const
int
begin
[],
const
int
end
[],
const
int
strides
[],
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Logic
(
const
ExecutionContext
&
ctx
,
const
cnnlLogicOp_t
log_method
,
static
void
Logic
(
const
ExecutionContext
&
ctx
,
const
cnnlLogicOp_t
log_method
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
ouput_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
ouput_desc
,
void
*
output
);
static
void
Select
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
condition_desc
,
const
void
*
condition_ptr
,
const
cnnlTensorDescriptor_t
then_desc
,
const
void
*
then_ptr
,
const
cnnlTensorDescriptor_t
else_desc
,
const
void
*
else_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
static
void
Select
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
condition_desc
,
const
void
*
condition_ptr
,
const
cnnlTensorDescriptor_t
then_desc
,
const
void
*
then_ptr
,
const
cnnlTensorDescriptor_t
else_desc
,
const
void
*
else_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output_ptr
);
static
void
AssignAdd
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
static
void
AssignAdd
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
update_desc
,
const
void
*
update
,
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
static
void
AssignSub
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
static
void
AssignSub
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
update_desc
,
const
void
*
update
,
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
static
void
Assign
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
update_desc
,
const
void
*
update
,
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
const
cnnlTensorDescriptor_t
param_desc
,
void
*
param
);
static
void
GatherNd
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
params_desc
,
const
void
*
params
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BatchToSpace
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlSpaceBatchParam_t
param
);
void
*
output
,
const
cnnlSpaceBatchParam_t
param
);
static
void
BatchToSpaceNd
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
cnnlSpaceBatchNdDescriptor_t
param
,
void
*
extra_device_input
,
size_t
extra_input_size
,
void
*
extra_device_input
,
size_t
extra_input_size
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
PoolingForward
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
int64_t
output_h
,
int64_t
output_w
,
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
void
*
extra_input_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
PoolingForward
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
int64_t
output_h
,
int64_t
output_w
,
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
void
*
extra_input_ptr
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
AdaptivePoolingForward
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
index_desc
,
void
*
index
);
static
void
AdaptivePoolingForward
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
index_desc
,
void
*
index
);
static
void
Pool3D
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
static
void
Pool3D
(
const
ExecutionContext
&
ctx
,
cnnlPoolingMode_t
pool_mode
,
const
std
::
vector
<
int64_t
>&
output_shape
,
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Pad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
paddings
,
const
void
*
padding_value
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
paddings
,
const
void
*
padding_value
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Matmul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
static
void
Matmul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BatchMatmul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BatchMatmul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
in0_desc
,
const
void
*
in0
,
const
cnnlTensorDescriptor_t
in1_desc
,
const
void
*
in1
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
MulAx
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
alpha_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
alpha_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
OpTensor
(
const
ExecutionContext
&
ctx
,
const
cnnlOpTensorDescriptor_t
op_tensor_desc
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
cnnlDataType_t
dtype
,
const
float
alpha1_float
=
1.
f
,
const
float
alpha2_float
=
1.
f
,
const
float
beta_float
=
0.
f
);
static
void
BiasAddGrad
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
static
void
BiasAddGrad
(
const
ExecutionContext
&
ctx
,
const
int
axis
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
output_desc
,
...
...
@@ -810,9 +1019,13 @@ class MLUCnnl {
static
void
OneHot
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
desc_indices
,
const
void
*
indices
,
const
int
depth
,
const
void
*
on_value
,
const
void
*
off_value
,
const
int
axis
,
cnnlDataType_t
output_data_type
,
void
*
output
);
const
void
*
indices
,
const
int
depth
,
const
void
*
on_value
,
const
void
*
off_value
,
const
int
axis
,
cnnlDataType_t
output_data_type
,
void
*
output
);
static
void
NonMaxSuppression
(
const
ExecutionContext
&
ctx
,
const
cnnlNmsDescriptor_t
nms_desc
,
...
...
@@ -821,35 +1034,47 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
confidence_desc
,
const
void
*
confidence
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
void
*
output_size
);
void
*
output
,
void
*
output_size
);
static
void
SoftmaxCrossEntropyWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxMode_t
mode
,
const
ExecutionContext
&
ctx
,
cnnlSoftmaxMode_t
mode
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
logits_in
,
const
cnnlTensorDescriptor_t
label_desc
,
const
void
*
labels_in
,
const
cnnlTensorDescriptor_t
loss_out_desc
,
void
*
loss_out
,
const
cnnlTensorDescriptor_t
back_out_desc
,
void
*
back_out
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
logits_in
,
const
cnnlTensorDescriptor_t
label_desc
,
const
void
*
labels_in
,
const
cnnlTensorDescriptor_t
loss_out_desc
,
void
*
loss_out
,
const
cnnlTensorDescriptor_t
back_out_desc
,
void
*
back_out
);
static
void
SoftmaxForward
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxAlgorithm_t
algorithm
,
cnnlSoftmaxMode_t
mode
,
const
void
*
alpha
,
cnnlSoftmaxMode_t
mode
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
void
*
beta
,
const
void
*
input
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
SoftmaxBackward
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxAlgorithm_t
algorithm
,
cnnlSoftmaxMode_t
mode
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
static
void
SoftmaxBackward
(
const
ExecutionContext
&
ctx
,
cnnlSoftmaxAlgorithm_t
algorithm
,
cnnlSoftmaxMode_t
mode
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
Softplus
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
features_desc
,
const
void
*
features
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
SoftplusGrad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
gradients_desc
,
...
...
@@ -860,38 +1085,59 @@ class MLUCnnl {
void
*
output
);
static
void
RsqrtGrad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
y
,
const
void
*
diff_y
,
void
*
output
);
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
y
,
const
void
*
diff_y
,
void
*
output
);
static
void
SqrtGrad
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
y
,
const
void
*
diff_y
,
void
*
output
);
static
void
ConvolutionForward
(
const
ExecutionContext
&
ctx
,
cnnlConvolutionDescriptor_t
conv_desc_
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias_ptr
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filtet_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
y
,
const
void
*
diff_y
,
void
*
output
);
static
void
ConvolutionForward
(
const
ExecutionContext
&
ctx
,
cnnlConvolutionDescriptor_t
conv_desc_
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias_ptr
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filtet_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FusedConvBNQuantify
(
const
ExecutionContext
&
ctx
,
cnnlConvolutionDescriptor_t
conv_desc
,
const
void
*
epsilon_ptr
,
const
int
fused_ops_number
,
const
cnnlDataType_t
tensor_dtype
,
const
int
input_position
,
const
float
input_scale
,
const
int
filter_position
,
const
float
filter_scale
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale_ptr
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset_ptr
,
const
cnnlTensorDescriptor_t
mean_desc
,
const
void
*
mean_ptr
,
const
cnnlTensorDescriptor_t
variance_desc
,
const
void
*
variance_ptr
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filtet_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
output_desc
,
static
void
FusedConvBNQuantify
(
const
ExecutionContext
&
ctx
,
cnnlConvolutionDescriptor_t
conv_desc
,
const
void
*
epsilon_ptr
,
const
int
fused_ops_number
,
const
cnnlDataType_t
tensor_dtype
,
const
int
input_position
,
const
float
input_scale
,
const
int
filter_position
,
const
float
filter_scale
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale_ptr
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset_ptr
,
const
cnnlTensorDescriptor_t
mean_desc
,
const
void
*
mean_ptr
,
const
cnnlTensorDescriptor_t
variance_desc
,
const
void
*
variance_ptr
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filtet_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Tile
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
UnsortedSegmentSum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
data_desc
,
...
...
@@ -901,12 +1147,17 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Reduce
(
const
ExecutionContext
&
ctx
,
const
bool
need_workspace
,
static
void
Reduce
(
const
ExecutionContext
&
ctx
,
const
bool
need_workspace
,
const
cnnlReduceDescriptor_t
reduction_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
size_t
indices_size
,
void
*
indices
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
size_t
indices_size
,
void
*
indices
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FloorDiv
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
...
...
@@ -914,34 +1165,41 @@ class MLUCnnl {
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FloorMod
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Maximum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Minimum
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
PowR
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input1_desc
,
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DivNoNan
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
...
...
@@ -949,7 +1207,8 @@ class MLUCnnl {
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
SquaredDifference
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input1_desc
,
...
...
@@ -960,52 +1219,73 @@ class MLUCnnl {
void
*
output
);
static
void
L2Loss
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
void
*
output
);
static
void
Abs
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Neg
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Floor
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Ceil
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
IsNan
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Square
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Sqrt
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Rsqrt
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Cos
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Sin
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
TrigonForward
(
const
ExecutionContext
&
ctx
,
const
cnnlTrigonDescriptor_t
trigon_desc
,
...
...
@@ -1016,31 +1296,41 @@ class MLUCnnl {
static
void
Exp
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Sign
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
IsFinite
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
IsNanInf
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
void
*
output
);
const
void
*
input
,
void
*
output
);
static
void
Erf
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Log1p
(
const
ExecutionContext
&
ctx
,
cnnlComputationPreference_t
prefer
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
LogicalNot
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
...
...
@@ -1048,152 +1338,250 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DynamicStitch
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
*
indices_desc
,
const
int
**
indices
,
const
cnnlTensorDescriptor_t
*
data_desc
,
const
void
**
data
,
const
int
size
,
int
*
indices_dims
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DynamicStitch
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
*
indices_desc
,
const
int
**
indices
,
const
cnnlTensorDescriptor_t
*
data_desc
,
const
void
**
data
,
const
int
size
,
int
*
indices_dims
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
CropAndResize
(
const
ExecutionContext
&
ctx
,
const
std
::
string
method_name
,
const
float
extrapolation_value
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_index_desc
,
const
void
*
box_index
,
const
cnnlTensorDescriptor_t
output_desc
,
static
void
CropAndResize
(
const
ExecutionContext
&
ctx
,
const
std
::
string
method_name
,
const
float
extrapolation_value
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_index_desc
,
const
void
*
box_index
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
CropAndResizeBackwardImage
(
const
ExecutionContext
&
ctx
,
const
std
::
string
method_name
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_idx_desc
,
const
void
*
box_idx
,
const
cnnlTensorDescriptor_t
grads_image_desc
,
void
*
grads_image
);
const
ExecutionContext
&
ctx
,
const
std
::
string
method_name
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_idx_desc
,
const
void
*
box_idx
,
const
cnnlTensorDescriptor_t
grads_image_desc
,
void
*
grads_image
);
static
void
CropAndResizeBackwardBoxes
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_idx_desc
,
const
void
*
box_idx
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
PoolingBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
AdaptivePoolingBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingMode_t
pool_mode
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
index_desc
,
const
void
*
index
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
image_desc
,
const
void
*
image
,
const
cnnlTensorDescriptor_t
boxes_desc
,
const
void
*
boxes
,
const
cnnlTensorDescriptor_t
box_idx_desc
,
const
void
*
box_idx
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
PoolingBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingDescriptor_t
pooling_desc
,
const
void
*
alpha
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
diff_y_desc
,
const
void
*
diff_y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
AdaptivePoolingBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingMode_t
pool_mode
,
const
cnnlTensorDescriptor_t
y_desc
,
const
void
*
y
,
const
cnnlTensorDescriptor_t
index_desc
,
const
void
*
index
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
);
static
void
PoolingIndex
(
const
ExecutionContext
&
ctx
,
const
cnnlPoolingDescriptor_t
pooling_desc
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
y
);
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
y
);
static
void
SpaceToBatch
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
const
int64_t
block_shape
[]);
void
*
output
,
const
int64_t
block_shape
[]);
static
void
SpaceToBatchNd
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
cnnlSpaceBatchNdDescriptor_t
param
,
void
*
extra_device_input
,
size_t
extra_input_size
,
void
*
extra_device_input
,
size_t
extra_input_size
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Interp
(
const
ExecutionContext
&
ctx
,
const
cnnlInterpMode_t
mode
,
const
bool
align_corners
,
const
bool
half_pixel_centers
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Interp
(
const
ExecutionContext
&
ctx
,
const
cnnlInterpMode_t
mode
,
const
bool
align_corners
,
const
bool
half_pixel_centers
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
InterpBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlInterpBackwardMode_t
mode
,
const
bool
align_corners
,
const
bool
half_pixel_centers
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
InterpBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlInterpBackwardMode_t
mode
,
const
bool
align_corners
,
const
bool
half_pixel_centers
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeParam
(
const
ExecutionContext
&
ctx
,
const
cnnlQuantizeMode_t
mode
,
const
int
bitwidth
,
const
cnnlQuantizeMode_t
mode
,
const
int
bitwidth
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
void
*
position
,
void
*
scale
,
const
void
*
input
,
void
*
position
,
void
*
scale
,
void
*
offset
);
static
void
QuantizeMatMul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeMatMul
(
const
ExecutionContext
&
ctx
,
const
bool
transpose_a
,
const
bool
transpose_b
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeBatchMatMul
(
const
ExecutionContext
&
ctx
,
const
bool
adj_x
,
const
bool
adj_y
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeBatchMatMul
(
const
ExecutionContext
&
ctx
,
const
bool
adj_x
,
const
bool
adj_y
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeBatchMatMulBCast
(
const
ExecutionContext
&
ctx
,
const
bool
adj_x
,
const
bool
adj_y
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QuantizeBatchMatMulBCast
(
const
ExecutionContext
&
ctx
,
const
bool
adj_x
,
const
bool
adj_y
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
void
*
a_position
,
const
void
*
a_scale
,
const
void
*
a_offset
,
const
cnnlTensorDescriptor_t
b_desc
,
const
void
*
b
,
const
void
*
b_position
,
const
void
*
b_scale
,
const
void
*
b_offset
,
const
cnnlDataType_t
quant_type
,
const
cnnlDataType_t
data_type
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
FusedBatchNorm
(
const
ExecutionContext
&
ctx
,
const
bool
is_training
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale
,
const
void
*
offset
,
const
void
*
estimated_mean
,
const
void
*
estimated_variance
,
float
epsilon
,
float
momentum
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
void
*
batch_mean
,
void
*
batch_var
,
void
*
saved_mean
,
void
*
saved_var
);
static
void
FusedBatchNormGrad
(
const
ExecutionContext
&
ctx
,
const
bool
is_training
,
const
cnnlTensorDescriptor_t
y_backprop_desc
,
const
void
*
y_backprop
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale
,
const
void
*
saved_mean
,
const
void
*
saved_var
,
float
epsilon
,
const
cnnlTensorDescriptor_t
x_backprop_desc
,
void
*
x_backprop
,
void
*
scale_backprop
,
void
*
offset_backprop
);
static
void
LayerNormForward
(
const
ExecutionContext
&
ctx
,
int
axis
,
static
void
FusedBatchNorm
(
const
ExecutionContext
&
ctx
,
const
bool
is_training
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale
,
const
void
*
offset
,
const
void
*
estimated_mean
,
const
void
*
estimated_variance
,
float
epsilon
,
float
momentum
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
void
*
batch_mean
,
void
*
batch_var
,
void
*
saved_mean
,
void
*
saved_var
);
static
void
FusedBatchNormGrad
(
const
ExecutionContext
&
ctx
,
const
bool
is_training
,
const
cnnlTensorDescriptor_t
y_backprop_desc
,
const
void
*
y_backprop
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
scale_desc
,
const
void
*
scale
,
const
void
*
saved_mean
,
const
void
*
saved_var
,
float
epsilon
,
const
cnnlTensorDescriptor_t
x_backprop_desc
,
void
*
x_backprop
,
void
*
scale_backprop
,
void
*
offset_backprop
);
static
void
LayerNormForward
(
const
ExecutionContext
&
ctx
,
int
axis
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
weight_bias_desc
,
const
void
*
weight
,
const
void
*
bias
,
float
eps
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
y
,
const
void
*
weight
,
const
void
*
bias
,
float
eps
,
const
cnnlTensorDescriptor_t
y_desc
,
void
*
y
,
const
cnnlTensorDescriptor_t
mean_rstd_desc
,
void
*
saved_mean
,
void
*
saved_rstd
);
void
*
saved_mean
,
void
*
saved_rstd
);
static
void
LayerNormBackward
(
const
ExecutionContext
&
ctx
,
int
axis
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_z_desc
,
const
void
*
diff_z
,
const
cnnlTensorDescriptor_t
weight_bias_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
mean_rstd_desc
,
const
void
*
saved_mean
,
const
void
*
saved_rstd
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
,
void
*
diff_weight
,
void
*
diff_bias
);
static
void
LayerNormBackward
(
const
ExecutionContext
&
ctx
,
int
axis
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlTensorDescriptor_t
diff_z_desc
,
const
void
*
diff_z
,
const
cnnlTensorDescriptor_t
weight_bias_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
mean_rstd_desc
,
const
void
*
saved_mean
,
const
void
*
saved_rstd
,
const
cnnlTensorDescriptor_t
diff_x_desc
,
void
*
diff_x
,
void
*
diff_weight
,
void
*
diff_bias
);
static
void
Transpose
(
const
ExecutionContext
&
ctx
,
const
std
::
vector
<
int
>
perm
,
const
int
input_dim
,
const
std
::
vector
<
int
>
perm
,
const
int
input_dim
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
TrilTriu
(
const
ExecutionContext
&
ctx
,
const
int
diagonal_k
,
const
bool
tri_up_mode
,
...
...
@@ -1203,109 +1591,170 @@ class MLUCnnl {
static
void
MatrixBandPart
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
data_desc
,
const
void
*
input
,
const
int
num_lower
,
const
int
num_upper
,
void
*
output
);
const
void
*
input
,
const
int
num_lower
,
const
int
num_upper
,
void
*
output
);
static
void
NumTrue
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
Tensor
index
,
uint32_t
*
num_true
);
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
Tensor
index
,
uint32_t
*
num_true
);
static
void
Where
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
uint32_t
*
strides
,
const
uint32_t
*
index
,
const
cnnlTensorDescriptor_t
y_desc
,
int
*
y
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
uint32_t
*
strides
,
const
uint32_t
*
index
,
const
cnnlTensorDescriptor_t
y_desc
,
int
*
y
,
const
bool
as_tuple
);
static
void
Conv2D
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
input_position
,
const
void
*
input_scale
,
const
void
*
input_offset
,
const
void
*
filter_position
,
const
void
*
filter_scale
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
input_position
,
const
void
*
input_scale
,
const
void
*
input_offset
,
const
void
*
filter_position
,
const
void
*
filter_scale
,
const
void
*
filter_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
filter_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias
,
const
cnnlTensorDescriptor_t
output_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ConvBackpropInput
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlTensorDescriptor_t
filter_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
in_backprop_desc
,
void
*
in_backprop
);
static
void
ConvBackpropInput
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlTensorDescriptor_t
filter_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
in_backprop_desc
,
void
*
in_backprop
);
static
void
QuantizeConvBackpropInput
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
filter_position
,
const
void
*
filter_scale
,
const
void
*
filter_offset
,
const
void
*
out_backprop_position
,
const
void
*
out_backprop_scale
,
const
void
*
out_backprop_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
in_backprop_desc
,
void
*
in_backprop
);
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
filter_position
,
const
void
*
filter_scale
,
const
void
*
filter_offset
,
const
void
*
out_backprop_position
,
const
void
*
out_backprop_scale
,
const
void
*
out_backprop_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
filter
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
in_backprop_desc
,
void
*
in_backprop
);
static
void
ConvBackpropFilter
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
filter_backprop_desc
,
void
*
filter_backprop
);
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
filter_backprop_desc
,
void
*
filter_backprop
);
static
void
QuantizeConvBackpropFilter
(
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
input_position
,
const
void
*
input_scale
,
const
void
*
input_offset
,
const
void
*
out_backprop_position
,
const
void
*
out_backprop_scale
,
const
void
*
out_backprop_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
filter_backprop_desc
,
void
*
filter_backprop
);
static
void
DCNForward
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
ExecutionContext
&
ctx
,
const
cnnlConvolutionDescriptor_t
conv_desc
,
const
cnnlDataType_t
tensor_dtype
,
const
cnnlDataType_t
dt_onchip
,
const
void
*
input_position
,
const
void
*
input_scale
,
const
void
*
input_offset
,
const
void
*
out_backprop_position
,
const
void
*
out_backprop_scale
,
const
void
*
out_backprop_offset
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
out_backprop_desc
,
const
void
*
out_backprop
,
const
cnnlTensorDescriptor_t
filter_backprop_desc
,
void
*
filter_backprop
);
static
void
DCNBackwardData
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
grad_output_desc
,
const
void
*
grad_output
,
const
cnnlTensorDescriptor_t
grad_input_desc
,
void
*
grad_input
,
const
cnnlTensorDescriptor_t
grad_offset_desc
,
void
*
grad_offset
,
const
cnnlTensorDescriptor_t
grad_mask_desc
,
void
*
grad_mask
);
static
void
DCNBackwardWeight
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
grad_output_desc
,
const
void
*
grad_output
,
const
cnnlTensorDescriptor_t
grad_weight_desc
,
void
*
grad_weight
,
const
cnnlTensorDescriptor_t
grad_bias_desc
,
void
*
grad_bias
);
static
void
DCNForward
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
bias_desc
,
const
void
*
bias
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
DCNBackwardData
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
grad_output_desc
,
const
void
*
grad_output
,
const
cnnlTensorDescriptor_t
grad_input_desc
,
void
*
grad_input
,
const
cnnlTensorDescriptor_t
grad_offset_desc
,
void
*
grad_offset
,
const
cnnlTensorDescriptor_t
grad_mask_desc
,
void
*
grad_mask
);
static
void
DCNBackwardWeight
(
const
ExecutionContext
&
ctx
,
const
cnnlDCNDescriptor_t
dcn_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
offset_desc
,
const
void
*
offset
,
const
cnnlTensorDescriptor_t
mask_desc
,
const
void
*
mask
,
const
cnnlTensorDescriptor_t
grad_output_desc
,
const
void
*
grad_output
,
const
cnnlTensorDescriptor_t
grad_weight_desc
,
void
*
grad_weight
,
const
cnnlTensorDescriptor_t
grad_bias_desc
,
void
*
grad_bias
);
static
void
InTopK
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
predictions_desc
,
const
void
*
predictions
,
const
cnnlTensorDescriptor_t
targets_desc
,
const
void
*
targets
,
const
cnnlTensorDescriptor_t
k_desc
,
const
void
*
k
,
const
int
k_int
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
void
*
targets
,
const
cnnlTensorDescriptor_t
k_desc
,
const
void
*
k
,
const
int
k_int
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
ScatterNd
(
const
ExecutionContext
&
ctx
,
cnnlScatterNdMode_t
mode
,
static
void
ScatterNd
(
const
ExecutionContext
&
ctx
,
cnnlScatterNdMode_t
mode
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
updates_desc
,
const
void
*
updates
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BitWise
(
const
ExecutionContext
&
ctx
,
const
cnnlBitComputeOp_t
optype
,
...
...
@@ -1313,12 +1762,17 @@ class MLUCnnl {
const
void
*
input1
,
const
cnnlTensorDescriptor_t
input2_desc
,
const
void
*
input2
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
QR
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
cnnlTensorDescriptor_t
q_desc
,
void
*
q
,
const
cnnlTensorDescriptor_t
r_desc
,
void
*
r
,
const
bool
some
);
const
cnnlTensorDescriptor_t
a_desc
,
const
void
*
a
,
const
cnnlTensorDescriptor_t
q_desc
,
void
*
q
,
const
cnnlTensorDescriptor_t
r_desc
,
void
*
r
,
const
bool
some
);
static
void
Reciprocal
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
input_desc
,
...
...
@@ -1326,55 +1780,85 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceLoss
(
const
ExecutionContext
&
ctx
,
const
cnnlBceLossReduction_t
reduction
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceLoss
(
const
ExecutionContext
&
ctx
,
const
cnnlBceLossReduction_t
reduction
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceLossBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlBceLossReduction_t
reduction
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceLossBackward
(
const
ExecutionContext
&
ctx
,
const
cnnlBceLossReduction_t
reduction
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
EmbeddingForward
(
const
ExecutionContext
&
ctx
,
const
int
padding_idx
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
int
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
EmbeddingForward
(
const
ExecutionContext
&
ctx
,
const
int
padding_idx
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
int
*
indices
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
Transform
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
static
void
Transform
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
const
void
*
beta
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
EmbeddingBackward
(
const
ExecutionContext
&
ctx
,
int
padding_idx
,
bool
scale_grad_by_freq
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
diff_desc
,
const
void
*
diff
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
EmbeddingBackward
(
const
ExecutionContext
&
ctx
,
int
padding_idx
,
bool
scale_grad_by_freq
,
const
cnnlTensorDescriptor_t
indices_desc
,
const
void
*
indices
,
const
cnnlTensorDescriptor_t
diff_desc
,
const
void
*
diff
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
pos_weight_desc
,
const
void
*
pos_weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
pos_weight_desc
,
const
void
*
pos_weight
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
BceWithLogitsBackward
(
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
pos_weight_desc
,
const
void
*
pos_weight
,
const
cnnlTensorDescriptor_t
diff_input_desc
,
void
*
diff_input
);
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
const
cnnlTensorDescriptor_t
grad_desc
,
const
void
*
grad
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
target_desc
,
const
void
*
target
,
const
cnnlTensorDescriptor_t
weight_desc
,
const
void
*
weight
,
const
cnnlTensorDescriptor_t
pos_weight_desc
,
const
void
*
pos_weight
,
const
cnnlTensorDescriptor_t
diff_input_desc
,
void
*
diff_input
);
};
template
<
typename
T
>
...
...
@@ -1393,22 +1877,27 @@ inline void TransposeFromMLUTensor(const ExecutionContext& ctx,
transformed_output
->
mutable_data
<
T
>
(
framework
::
DDim
(
output_shape
.
data
(),
dim_size
),
ctx
.
GetPlace
());
}
MLUCnnlTensorDesc
trans_in_desc
(
*
transformed_input
,
CNNL_LAYOUT_ARRAY
,
ToCnnlDataType
<
T
>
());
MLUCnnlTensorDesc
trans_out_desc
(
*
transformed_output
,
CNNL_LAYOUT_ARRAY
,
ToCnnlDataType
<
T
>
());
MLUCnnl
::
Transpose
(
ctx
,
perm
,
dim_size
,
trans_in_desc
.
get
(),
GetBasePtr
(
transformed_input
),
trans_out_desc
.
get
(),
MLUCnnlTensorDesc
trans_in_desc
(
*
transformed_input
,
CNNL_LAYOUT_ARRAY
,
ToCnnlDataType
<
T
>
());
MLUCnnlTensorDesc
trans_out_desc
(
*
transformed_output
,
CNNL_LAYOUT_ARRAY
,
ToCnnlDataType
<
T
>
());
MLUCnnl
::
Transpose
(
ctx
,
perm
,
dim_size
,
trans_in_desc
.
get
(),
GetBasePtr
(
transformed_input
),
trans_out_desc
.
get
(),
GetBasePtr
(
transformed_output
));
}
template
<
typename
T
>
inline
void
FillMLUTensorWithHostValue
(
const
ExecutionContext
&
ctx
,
T
value
,
inline
void
FillMLUTensorWithHostValue
(
const
ExecutionContext
&
ctx
,
T
value
,
Tensor
*
out
)
{
MLUCnnlTensorDesc
out_desc
(
*
out
);
MLUCnnl
::
Fill
(
ctx
,
CNNL_POINTER_MODE_HOST
,
&
value
,
out_desc
.
get
(),
GetBasePtr
(
out
));
MLUCnnl
::
Fill
(
ctx
,
CNNL_POINTER_MODE_HOST
,
&
value
,
out_desc
.
get
(),
GetBasePtr
(
out
));
}
}
// namespace operators
...
...
paddle/fluid/operators/scatter_op_mlu.cc
浏览文件 @
0d17c047
...
...
@@ -33,26 +33,43 @@ class ScatterMLUKernel : public framework::OpKernel<T> {
cnnlScatterRefMode_t
mode
;
if
(
overwrite
)
{
mode
=
CNNL_SCATTERREF_UPDATE
;
MLUCnnl
::
ScatterFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
updates_desc
.
get
(),
GetBasePtr
(
updates
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
MLUCnnl
::
ScatterRefFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
updates_desc
.
get
(),
GetBasePtr
(
updates
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
}
else
{
Tensor
tensor_zeros
(
updates
->
type
());
tensor_zeros
.
mutable_data
<
T
>
(
updates
->
dims
(),
ctx
.
GetPlace
());
MLUCnnlTensorDesc
tensor_zeros_desc
(
tensor_zeros
);
float
value
=
0.0
;
auto
value_t
=
static_cast
<
T
>
(
value
);
MLUCnnl
::
Fill
(
ctx
,
CNNL_POINTER_MODE_HOST
,
&
value_t
,
tensor_zeros_desc
.
get
(),
GetBasePtr
(
&
tensor_zeros
));
MLUCnnl
::
Fill
(
ctx
,
CNNL_POINTER_MODE_HOST
,
&
value_t
,
tensor_zeros_desc
.
get
(),
GetBasePtr
(
&
tensor_zeros
));
mode
=
CNNL_SCATTERREF_UPDATE
;
MLUCnnl
::
ScatterFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
MLUCnnl
::
ScatterRefFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
tensor_zeros_desc
.
get
(),
GetBasePtr
(
&
tensor_zeros
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
GetBasePtr
(
&
tensor_zeros
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
mode
=
CNNL_SCATTERREF_ADD
;
MLUCnnl
::
ScatterFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
updates_desc
.
get
(),
GetBasePtr
(
updates
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
MLUCnnl
::
ScatterRefFunctor
(
ctx
,
x_desc
.
get
(),
GetBasePtr
(
x
),
updates_desc
.
get
(),
GetBasePtr
(
updates
),
indices_desc
.
get
(),
GetBasePtr
(
indices
),
mode
);
}
paddle
::
framework
::
TensorCopy
(
*
x
,
place
,
out
);
}
...
...
@@ -62,5 +79,6 @@ class ScatterMLUKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_MLU_KERNEL
(
scatter
,
ops
::
ScatterMLUKernel
<
float
>
,
REGISTER_OP_MLU_KERNEL
(
scatter
,
ops
::
ScatterMLUKernel
<
float
>
,
ops
::
ScatterMLUKernel
<
paddle
::
platform
::
float16
>
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录