sparse_backward.yaml 16.9 KB
Newer Older
1
- backward_op : abs_grad
2 3 4
  forward : tanh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
5 6 7
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
8 9 10 11
  kernel :
    func : abs_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           abs_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

12
- backward_op : acos_grad
13 14 15
  forward : acos(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
16 17 18
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
19 20 21 22
  kernel :
    func : acos_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           acos_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

23
- backward_op : acosh_grad
24 25 26
  forward : acosh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
27 28 29
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
30 31 32 33
  kernel :
    func : acosh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           acosh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

34
- backward_op : add_grad
35 36 37
  forward : add(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
Z
zhangkaihuo 已提交
38
  infer_meta :
Z
zhangkaihuo 已提交
39
    func : GeneralBinaryGradInferMeta
Z
zhangkaihuo 已提交
40
    param : [x, y]
41 42
  kernel :
    func : add_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
Z
zhangkaihuo 已提交
43 44
           add_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr},
           add_coo_dense_grad{sparse_coo, dense, sparse_coo -> sparse_coo, dense}
45

46
- backward_op : addmm_grad
47 48 49
  forward : addmm(Tensor input, Tensor x, Tensor y, float alpha=1.0, float beta=1.0) -> Tensor(out)
  args : (Tensor input, Tensor x, Tensor y, Tensor out_grad, float alpha=1.0, float beta=1.0)
  output : Tensor(input_grad), Tensor(x_grad), Tensor(y_grad)
Z
zhangkaihuo 已提交
50 51 52
  infer_meta :
    func : GeneralTernaryGradInferMeta
    param : [input, x, y]
53 54 55 56 57 58
  kernel :
    func : addmm_csr_dense_grad {dense, sparse_csr, dense, dense -> dense, sparse_csr, dense},
           addmm_csr_csr_grad {sparse_csr, sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr, sparse_csr},
           addmm_coo_dense_grad {dense, sparse_coo, dense, dense -> dense, sparse_coo, dense},
           addmm_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo, sparse_coo}

59
- backward_op : asin_grad
60 61 62
  forward : asin(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
63 64 65
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
66 67 68 69
  kernel :
    func : asin_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           asin_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

70
- backward_op : asinh_grad
71 72 73
  forward : asinh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
74 75 76
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
77 78 79 80
  kernel :
    func : asinh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           asinh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

81
- backward_op : atan_grad
82 83 84
  forward : atan(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
85 86 87
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
88 89 90 91
  kernel :
    func : atan_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           atan_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

92
- backward_op : atanh_grad
93 94 95
  forward : atanh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
96 97 98
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
99 100 101 102
  kernel :
    func : atanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

103 104 105 106 107 108 109 110 111 112 113 114
- backward_op : batch_norm_grad
  forward : batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
  args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
  output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
  infer_meta :
    func : GeneralTernaryGradInferMeta
    param : [x, scale, bias]
  kernel :
    func : batch_norm_coo_grad {sparse_coo, dense, dense, dense, dense, dense, dense, dense, sparse_coo -> sparse_coo, dense, dense}
    data_type : out_grad
  optional : mean_out, variance_out, reserve_space

115
- backward_op : cast_grad
116 117 118
  forward : cast(Tensor x, DataType index_dtype, DataType value_dtype) -> Tensor(out)
  args : (Tensor x, Tensor out_grad, DataType value_dtype)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
119
  infer_meta :
Z
zhangkaihuo 已提交
120
    func : UnchangedInferMeta
Z
zhangkaihuo 已提交
121
    param: [x]
122 123 124 125 126
  kernel :
    func : cast_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           cast_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
    data_type : out_grad

Z
zhangkaihuo 已提交
127 128
- backward_op : conv3d_grad
  forward : conv3d (Tensor x, Tensor kernel, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key) -> Tensor(out), Tensor(rulebook), Tensor(counter)
129
  args : (Tensor x, Tensor kernel, Tensor out, Tensor rulebook, Tensor counter, Tensor out_grad, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key)
130
  output : Tensor(x_grad), Tensor(kernel_grad)
Z
zhangkaihuo 已提交
131 132 133
  infer_meta :
    func : GeneralBinaryGradInferMeta
    param : [x, kernel]
134
  kernel :
135
    func : conv3d_coo_grad{sparse_coo, dense, sparse_coo, dense, dense, sparse_coo -> sparse_coo, dense}
Z
zhangkaihuo 已提交
136

137
- backward_op : divide_grad
138 139 140
  forward : divide(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
Z
zhangkaihuo 已提交
141
  infer_meta :
Z
zhangkaihuo 已提交
142
    func : GeneralBinaryGradInferMeta
Z
zhangkaihuo 已提交
143
    param : [x, y]
144 145 146 147
  kernel :
    func : divide_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
           divide_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}

148
- backward_op : divide_scalar_grad
149 150 151 152 153
  forward : divide_scalar (Tensor x, float scalar) -> Tensor(out)
  args : (Tensor out_grad, float scalar)
  output : Tensor(x_grad)
  invoke : divide_scalar(out_grad, scalar)

154
- backward_op : expm1_grad
155 156 157
  forward : expm1(Tensor x) -> Tensor(out)
  args : (Tensor out, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
158 159 160
  infer_meta :
    func : UnchangedInferMeta
    param : [out]
161 162 163 164
  kernel :
    func : expm1_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           expm1_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

165
- backward_op : leaky_relu_grad
166 167 168
  forward : leaky_relu(Tensor x, float alpha) -> Tensor(out)
  args : (Tensor x, Tensor out_grad, float alpha)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
169 170 171
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
172 173 174 175
  kernel :
    func : leaky_relu_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           leaky_relu_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

176
- backward_op : log1p_grad
177 178 179
  forward : log1p(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
180 181 182
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
183 184 185 186
  kernel :
    func : log1p_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           log1p_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

187
- backward_op : masked_matmul_grad
188 189 190
  forward : masked_matmul(Tensor x, Tensor y, Tensor mask) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
Z
zhangkaihuo 已提交
191 192 193
  infer_meta :
    func : GeneralBinaryGradInferMeta
    param : [x, y]
194
  kernel :
195
    func : masked_matmul_csr_grad{dense, dense, sparse_csr -> dense, dense}
196

197
- backward_op : matmul_grad
198 199 200
  forward : matmul(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
Z
zhangkaihuo 已提交
201 202 203
  infer_meta :
    func : GeneralBinaryGradInferMeta
    param : [x, y]
204
  kernel :
205 206 207 208
    func : matmul_csr_dense_grad {sparse_csr, dense, dense -> sparse_csr, dense},
           matmul_csr_csr_grad {sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr},
           matmul_coo_dense_grad {sparse_coo, dense, dense -> sparse_coo, dense},
           matmul_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}
209

210
- backward_op : maxpool_grad
211 212
  forward : maxpool(Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) -> Tensor(out), Tensor(rulebook), Tensor(counter)
  args : (Tensor x, Tensor rulebook, Tensor counter, Tensor out, Tensor out_grad, int[] kernel_sizes)
213
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
214 215 216
  infer_meta :
    func : UnchangedInferMeta
    param: [x]
217
  kernel :
218
    func : maxpool_coo_grad {sparse_coo, dense, dense, sparse_coo, sparse_coo -> sparse_coo}
219

220
- backward_op : multiply_grad
221 222 223
  forward : multiply(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
Z
zhangkaihuo 已提交
224
  infer_meta :
Z
zhangkaihuo 已提交
225
    func : GeneralBinaryGradInferMeta
Z
zhangkaihuo 已提交
226
    param : [x, y]
227 228 229 230
  kernel :
    func : multiply_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
           multiply_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}

231
- backward_op : mv_grad
232 233 234
  forward : mv(Tensor x, Tensor vec) -> Tensor(out)
  args : (Tensor x, Tensor vec, Tensor out_grad)
  output : Tensor(x_grad), Tensor(vec_grad)
Z
zhangkaihuo 已提交
235 236 237
  infer_meta :
    func : GeneralBinaryGradInferMeta
    param : [x, vec]
238 239 240 241
  kernel :
    func : mv_coo_grad{sparse_coo, dense, dense -> sparse_coo, dense},
           mv_csr_grad{sparse_csr, dense, dense -> sparse_csr, dense}

242
- backward_op : pow_grad
243 244 245
  forward : pow(Tensor x, float factor) -> Tensor(out)
  args : (Tensor x, Tensor out_grad, float factor)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
246 247 248
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
249 250 251 252
  kernel :
    func : pow_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           pow_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

253
- backward_op : relu6_grad
254 255 256
  forward : relu6(Tensor x, float threshold) -> Tensor(out)
  args : (Tensor out, Tensor out_grad, float threshold)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
257 258 259
  infer_meta :
    func : UnchangedInferMeta
    param : [out]
260 261 262 263
  kernel :
    func : relu6_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           relu6_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

264
- backward_op : relu_grad
265
  forward : relu(Tensor x) -> Tensor(out)
266
  args : (Tensor out, Tensor out_grad)
267
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
268 269 270
  infer_meta :
    func : UnchangedInferMeta
    param : [out]
Z
zhangkaihuo 已提交
271
  kernel :
272 273
    func : relu_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           relu_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
274 275 276 277 278 279 280 281 282 283 284

- backward_op : reshape_grad
  forward : reshape(Tensor x, IntArray shape) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
  kernel :
    func : reshape_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           reshape_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
285

286
- backward_op : scale_grad
287 288 289
  forward : scale(Tensor x, float scale, float bias, bool bias_after_scale) -> Tensor(out)
  args : (Tensor out_grad, float scale)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
290 291 292
  infer_meta :
    func : UnchangedInferMeta
    param : [out_grad]
293
  invoke : scale(out_grad, scale, 0.0, true)
Z
zhangkaihuo 已提交
294

295
- backward_op : sin_grad
296
  forward : sin(Tensor x) -> Tensor(out)
Z
zhangkaihuo 已提交
297
  args : (Tensor x, Tensor out_grad)
298
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
299 300 301
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
302
  kernel :
303 304 305
    func : sin_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           sin_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

306
- backward_op : sinh_grad
307 308 309
  forward : sinh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
310 311 312
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
313 314 315
  kernel :
    func : sinh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           sinh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
316

317
- backward_op : softmax_grad
318 319 320
  forward : softmax(Tensor x, int axis=-1) -> Tensor(out)
  args : (Tensor out, Tensor out_grad, int axis)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
321 322 323
  infer_meta :
    func : UnchangedInferMeta
    param : [out]
324 325 326
  kernel :
    func : softmax_csr_grad{sparse_csr, sparse_csr -> sparse_csr}

327
- backward_op : sparse_coo_tensor_grad
328 329 330
  forward : sparse_coo_tensor(Tensor values, Tensor indices, IntArray dense_shape) -> Tensor(out)
  args : (Tensor indices, Tensor out_grad)
  output : Tensor(values_grad)
Z
zhangkaihuo 已提交
331 332 333
  infer_meta :
    func : UnchangedInferMeta
    param: [out_grad]
334 335 336
  kernel :
    func : sparse_coo_tensor_grad{dense, sparse_coo -> dense}

337
- backward_op : sqrt_grad
338
  forward : sqrt(Tensor x) -> Tensor(out)
339
  args : (Tensor out, Tensor out_grad)
340
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
341 342 343
  infer_meta :
    func : UnchangedInferMeta
    param : [out]
344
  kernel :
345 346 347
    func : sqrt_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           sqrt_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

348
- backward_op : square_grad
349 350 351
  forward : square(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
352 353 354
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
355 356 357
  kernel :
    func : square_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           square_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
358

359
- backward_op : subtract_grad
360 361 362
  forward : subtract(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
Z
zhangkaihuo 已提交
363
  infer_meta :
Z
zhangkaihuo 已提交
364
    func : GeneralBinaryGradInferMeta
Z
zhangkaihuo 已提交
365
    param : [x, y]
366 367 368 369
  kernel :
    func : subtract_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
           subtract_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}

Z
zhangkaihuo 已提交
370 371 372 373 374 375 376 377 378 379 380 381
- backward_op : sync_batch_norm_grad
  forward : sync_batch_norm(Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
  args : (Tensor x, Tensor scale, Tensor bias, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
  output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
  infer_meta :
    func : GeneralTernaryGradInferMeta
    param : [x, scale, bias]
  kernel :
    func : sync_batch_norm_coo_grad{sparse_coo, dense, dense, dense, dense, dense, sparse_coo -> sparse_coo, dense, dense}
    data_type : out_grad
  optional : reserve_space

382
- backward_op : tan_grad
383 384 385
  forward : tan(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
386 387 388
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
389 390 391 392
  kernel :
    func : tan_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           tan_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

393
- backward_op : tanh_grad
394
  forward : tanh(Tensor x) -> Tensor(out)
395
  args : (Tensor out, Tensor out_grad)
396
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
397 398 399
  infer_meta :
    func : UnchangedInferMeta
    param : [out]
400
  kernel :
401 402
    func : tanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           tanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
403

404
- backward_op : to_dense_grad
405 406 407
  forward : to_dense(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
408 409 410
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
411 412 413
  kernel :
    func : coo_to_dense_grad{sparse_coo, dense -> sparse_coo}

414
- backward_op : to_sparse_coo_grad
415 416 417
  forward : to_sparse_coo(Tensor x, int64_t sparse_dim) -> Tensor(out)
  args : (Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
418 419
  infer_meta :
    func : UnchangedInferMeta
420 421 422
  kernel :
    func : coo_to_dense { sparse_coo -> dense }

423 424 425 426 427 428 429 430 431 432 433
- backward_op : transpose_grad
  forward : transpose(Tensor x, int[] perm) -> Tensor(out)
  args : (Tensor out_grad, int[] perm)
  output : Tensor(x_grad)
  infer_meta :
    func : TransposeGradInferMeta
    param : [out_grad, perm]
  kernel :
    func : transpose_coo_grad {sparse_coo -> sparse_coo},
           transpose_csr_grad {sparse_csr -> sparse_csr}

434
- backward_op : values_grad
435
  forward : values_coo(Tensor x) -> Tensor(out)
436 437
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
438 439 440
  infer_meta :
    func : UnchangedInferMeta
    param : [x]
441
  kernel :
442
    func : values_coo_grad{sparse_coo, dense-> sparse_coo}
443

444
- backward_op: fused_attention_grad
445 446
  forward : fused_attention_csr(Tensor query, Tensor key, Tensor value, Tensor sparse_mask, Tensor key_padding_mask, Tensor attn_mask) -> Tensor(out), Tensor(softmax)
  args: (Tensor query, Tensor key, Tensor value, Tensor softmax, Tensor out_grad)
447
  output : Tensor(query_grad), Tensor(key_grad), Tensor(value_grad)
Z
zhangkaihuo 已提交
448
  infer_meta :
Z
zhangkaihuo 已提交
449
    func : sparse::FusedAttentionGradInferMeta
450 451
  kernel :
    func : fused_attention_csr_grad{dense, dense, dense, sparse_csr, dense -> dense, dense, dense}
452 453
    layout : softmax
    data_type: query