sparse_backward.yaml 11.8 KB
Newer Older
1
- backward_op : abs_grad
2 3 4 5 6 7 8
  forward : tanh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : abs_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           abs_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

9
- backward_op : acos_grad
10 11 12 13 14 15 16
  forward : acos(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : acos_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           acos_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

17
- backward_op : acosh_grad
18 19 20 21 22 23 24
  forward : acosh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : acosh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           acosh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

25
- backward_op : add_grad
26 27 28 29 30 31 32
  forward : add(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
  kernel :
    func : add_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
           add_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}

33
- backward_op : addmm_grad
34 35 36 37 38 39 40 41 42
  forward : addmm(Tensor input, Tensor x, Tensor y, float alpha=1.0, float beta=1.0) -> Tensor(out)
  args : (Tensor input, Tensor x, Tensor y, Tensor out_grad, float alpha=1.0, float beta=1.0)
  output : Tensor(input_grad), Tensor(x_grad), Tensor(y_grad)
  kernel :
    func : addmm_csr_dense_grad {dense, sparse_csr, dense, dense -> dense, sparse_csr, dense},
           addmm_csr_csr_grad {sparse_csr, sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr, sparse_csr},
           addmm_coo_dense_grad {dense, sparse_coo, dense, dense -> dense, sparse_coo, dense},
           addmm_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo, sparse_coo}

43
- backward_op : asin_grad
44 45 46 47 48 49 50
  forward : asin(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : asin_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           asin_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

51
- backward_op : asinh_grad
52 53 54 55 56 57 58
  forward : asinh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : asinh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           asinh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

59
- backward_op : atan_grad
60 61 62 63 64 65 66
  forward : atan(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : atan_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           atan_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

67
- backward_op : atanh_grad
68 69 70 71 72 73 74
  forward : atanh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : atanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

75
- backward_op : cast_grad
76 77 78 79 80 81 82 83
  forward : cast(Tensor x, DataType index_dtype, DataType value_dtype) -> Tensor(out)
  args : (Tensor x, Tensor out_grad, DataType value_dtype)
  output : Tensor(x_grad)
  kernel :
    func : cast_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           cast_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
    data_type : out_grad

84
- backward_op : conv3d_coo_grad
85 86
  forward : conv3d_coo (Tensor x, Tensor kernel, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key) -> Tensor(out), Tensor(rulebook), Tensor(counter)
  args : (Tensor x, Tensor kernel, Tensor out, Tensor rulebook, Tensor counter, Tensor out_grad, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key)
87
  output : Tensor(x_grad), Tensor(kernel_grad)
88
  kernel :
89
    func : conv3d_coo_grad{sparse_coo, dense, sparse_coo, dense, dense, sparse_coo -> sparse_coo, dense}
Z
zhangkaihuo 已提交
90

91
- backward_op : divide_grad
92 93 94 95 96 97 98
  forward : divide(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
  kernel :
    func : divide_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
           divide_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}

99
- backward_op : divide_scalar_grad
100 101 102 103 104
  forward : divide_scalar (Tensor x, float scalar) -> Tensor(out)
  args : (Tensor out_grad, float scalar)
  output : Tensor(x_grad)
  invoke : divide_scalar(out_grad, scalar)

105
- backward_op : expm1_grad
106 107 108 109 110 111 112
  forward : expm1(Tensor x) -> Tensor(out)
  args : (Tensor out, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : expm1_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           expm1_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

113
- backward_op : leaky_relu_grad
114 115 116 117 118 119 120
  forward : leaky_relu(Tensor x, float alpha) -> Tensor(out)
  args : (Tensor x, Tensor out_grad, float alpha)
  output : Tensor(x_grad)
  kernel :
    func : leaky_relu_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           leaky_relu_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

121
- backward_op : log1p_grad
122 123 124 125 126 127 128
  forward : log1p(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : log1p_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           log1p_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

129
- backward_op : masked_matmul_grad
130 131 132 133
  forward : masked_matmul(Tensor x, Tensor y, Tensor mask) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
  kernel :
134
    func : masked_matmul_csr_grad{dense, dense, sparse_csr -> dense, dense}
135

136
- backward_op : matmul_grad
137 138 139 140
  forward : matmul(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
  kernel :
141 142 143 144
    func : matmul_csr_dense_grad {sparse_csr, dense, dense -> sparse_csr, dense},
           matmul_csr_csr_grad {sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr},
           matmul_coo_dense_grad {sparse_coo, dense, dense -> sparse_coo, dense},
           matmul_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}
145

146
- backward_op : maxpool_grad
147 148
  forward : maxpool(Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) -> Tensor(out), Tensor(rulebook), Tensor(counter)
  args : (Tensor x, Tensor rulebook, Tensor counter, Tensor out, Tensor out_grad, int[] kernel_sizes)
149 150
  output : Tensor(x_grad)
  kernel :
151
    func : maxpool_coo_grad {sparse_coo, dense, dense, sparse_coo, sparse_coo -> sparse_coo}
152

153
- backward_op : multiply_grad
154 155 156 157 158 159 160
  forward : multiply(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
  kernel :
    func : multiply_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
           multiply_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}

161
- backward_op : mv_grad
162 163 164 165 166 167 168
  forward : mv(Tensor x, Tensor vec) -> Tensor(out)
  args : (Tensor x, Tensor vec, Tensor out_grad)
  output : Tensor(x_grad), Tensor(vec_grad)
  kernel :
    func : mv_coo_grad{sparse_coo, dense, dense -> sparse_coo, dense},
           mv_csr_grad{sparse_csr, dense, dense -> sparse_csr, dense}

169
- backward_op : pow_grad
170 171 172 173 174 175 176
  forward : pow(Tensor x, float factor) -> Tensor(out)
  args : (Tensor x, Tensor out_grad, float factor)
  output : Tensor(x_grad)
  kernel :
    func : pow_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           pow_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

177
- backward_op : relu6_grad
178 179 180 181 182 183 184
  forward : relu6(Tensor x, float threshold) -> Tensor(out)
  args : (Tensor out, Tensor out_grad, float threshold)
  output : Tensor(x_grad)
  kernel :
    func : relu6_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           relu6_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

185
- backward_op : relu_grad
186
  forward : relu(Tensor x) -> Tensor(out)
187
  args : (Tensor out, Tensor out_grad)
188
  output : Tensor(x_grad)
Z
zhangkaihuo 已提交
189
  kernel :
190 191 192
    func : relu_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           relu_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

193
- backward_op : scale_grad
194 195 196 197
  forward : scale(Tensor x, float scale, float bias, bool bias_after_scale) -> Tensor(out)
  args : (Tensor out_grad, float scale)
  output : Tensor(x_grad)
  invoke : scale(out_grad, scale, 0.0, true)
Z
zhangkaihuo 已提交
198

199
- backward_op : sin_grad
200
  forward : sin(Tensor x) -> Tensor(out)
Z
zhangkaihuo 已提交
201
  args : (Tensor x, Tensor out_grad)
202 203
  output : Tensor(x_grad)
  kernel :
204 205 206
    func : sin_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           sin_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

207
- backward_op : sinh_grad
208 209 210 211 212 213
  forward : sinh(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : sinh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           sinh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
214

215
- backward_op : softmax_grad
216 217 218 219 220 221
  forward : softmax(Tensor x, int axis=-1) -> Tensor(out)
  args : (Tensor out, Tensor out_grad, int axis)
  output : Tensor(x_grad)
  kernel :
    func : softmax_csr_grad{sparse_csr, sparse_csr -> sparse_csr}

222
- backward_op : sparse_coo_tensor_grad
223 224 225 226 227 228
  forward : sparse_coo_tensor(Tensor values, Tensor indices, IntArray dense_shape) -> Tensor(out)
  args : (Tensor indices, Tensor out_grad)
  output : Tensor(values_grad)
  kernel :
    func : sparse_coo_tensor_grad{dense, sparse_coo -> dense}

229
- backward_op : sqrt_grad
230
  forward : sqrt(Tensor x) -> Tensor(out)
231
  args : (Tensor out, Tensor out_grad)
232
  output : Tensor(x_grad)
233
  kernel :
234 235 236
    func : sqrt_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           sqrt_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

237
- backward_op : square_grad
238 239 240 241 242 243
  forward : square(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : square_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           square_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
244

245
- backward_op : subtract_grad
246 247 248 249 250 251 252
  forward : subtract(Tensor x, Tensor y) -> Tensor(out)
  args : (Tensor x, Tensor y, Tensor out_grad)
  output : Tensor(x_grad), Tensor(y_grad)
  kernel :
    func : subtract_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
           subtract_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}

253
- backward_op : tan_grad
254 255 256 257 258 259 260
  forward : tan(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : tan_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           tan_csr_grad {sparse_csr, sparse_csr -> sparse_csr}

261
- backward_op : tanh_grad
262
  forward : tanh(Tensor x) -> Tensor(out)
263
  args : (Tensor out, Tensor out_grad)
264
  output : Tensor(x_grad)
265
  kernel :
266 267
    func : tanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
           tanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
268

269
- backward_op : to_dense_grad
270 271 272 273 274 275
  forward : to_dense(Tensor x) -> Tensor(out)
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : coo_to_dense_grad{sparse_coo, dense -> sparse_coo}

276
- backward_op : to_sparse_coo_grad
277 278 279 280 281 282
  forward : to_sparse_coo(Tensor x, int64_t sparse_dim) -> Tensor(out)
  args : (Tensor out_grad)
  output : Tensor(x_grad)
  kernel :
    func : coo_to_dense { sparse_coo -> dense }

283
- backward_op : values_grad
284
  forward : values_coo(Tensor x) -> Tensor(out)
285 286
  args : (Tensor x, Tensor out_grad)
  output : Tensor(x_grad)
287
  kernel :
288
    func : values_coo_grad{sparse_coo, dense-> sparse_coo}
289

290
- backward_op: fused_attention_grad
291 292
  forward : fused_attention_csr(Tensor query, Tensor key, Tensor value, Tensor sparse_mask, Tensor key_padding_mask, Tensor attn_mask) -> Tensor(out), Tensor(softmax)
  args: (Tensor query, Tensor key, Tensor value, Tensor softmax, Tensor out_grad)
293
  output : Tensor(query_grad), Tensor(key_grad), Tensor(value_grad)
294 295
  kernel :
    func : fused_attention_csr_grad{dense, dense, dense, sparse_csr, dense -> dense, dense, dense}
296 297
    layout : softmax
    data_type: query