eager_generator.h 18.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <map>
#include <set>
#include <string>

// NOTE(zhiqiu): Commonly, the inputs in auto-generated OP function are
// determined by the OP`s proto automatically, i.e., all the inputs registered
// in OpMaker.
// However, some OPs have dispensable inputs, which means the input can
// be none for some conditions. It is discovered that most dispensable inputs
// is not used in imperative mode, so we drop those inputs when generating OP
// functions. While, for very few OPs, the dispensable inputs are used, we
// need to manually specify them in this map.
std::map<std::string, std::set<std::string>> op_ins_map = {
W
wanghuancoder 已提交
30
    {"fc", {"Input", "W", "Bias"}},
31
    {"layer_norm", {"X", "Scale", "Bias"}},
W
wanghuancoder 已提交
32 33
    {"conv2d_fusion_cutlass", {"Input", "Filter", "Bias", "ResidualData"}},
    {"conv2d_fusion", {"Input", "Filter", "Bias", "ResidualData"}},
34 35
    {"bincount", {"X", "Weights"}},
    {"fused_attention",
36 37 38 39 40 41 42 43 44 45 46
     {"X",
      "LnScale",
      "LnBias",
      "QKVW",
      "QKVBias",
      "CacheKV",
      "SrcMask",
      "OutLinearW",
      "OutLinearBias",
      "Ln2Scale",
      "Ln2Bias"}},
47
    {"fused_gate_attention",
48 49 50 51 52 53 54 55 56 57 58
     {"Query",
      "Key",
      "QueryWeight",
      "KeyWeight",
      "ValueWeight",
      "QKVWeight",
      "NonbatchedBias",
      "SrcMask",
      "GateWeight",
      "GateBias",
      "OutLinearWeight",
59
      "OutLinearBias"}},
60
    {"fused_multi_transformer",
61 62 63 64 65 66
     {"X",
      "LnScale",
      "LnBias",
      "QKVW",
      "QKVBias",
      "CacheKV",
67
      "PreCaches",
68
      "RotaryPosEmb",
69
      "TimeStep",
70
      "SeqLengths",
71 72 73 74 75 76 77 78 79
      "SrcMask",
      "OutLinearW",
      "OutLinearBias",
      "FFNLnScale",
      "FFNLnBias",
      "FFN1Weight",
      "FFN1Bias",
      "FFN2Weight",
      "FFN2Bias"}},
80 81 82 83 84 85
    {"fused_multi_transformer_int8",
     {"X",           "LnScale",           "LnBias",       "QKVW",
      "QKVBias",     "CacheKV",           "TimeStep",     "SrcMask",
      "OutLinearW",  "OutLinearBias",     "FFNLnScale",   "FFNLnBias",
      "FFN1Weight",  "FFN1Bias",          "FFN2Weight",   "FFN2Bias",
      "QKVOutScale", "OutLinearOutScale", "FFN1OutScale", "FFN2OutScale"}},
86 87
    {"fused_bias_dropout_residual_layer_norm",
     {"X", "Residual", "Bias", "LnScale", "LnBias"}},
88 89 90 91
    {"instance_norm", {"X", "Scale", "Bias"}},
    {"gru_unit", {"Input", "HiddenPrev", "Weight", "Bias"}},
    {"label_smooth", {"X", "PriorDist"}},
    {"assign", {"X"}},
92 93
    {"crop", {"X", "Y", "Offsets"}},
    {"crop_tensor", {"X", "Shape", "Offsets"}},
94 95
    {"reshape2", {"X", "Shape"}},
    {"expand", {"X", "ExpandTimes"}},
W
wanghuancoder 已提交
96
    {"slice",
97 98 99 100
     {"Input",
      "StartsTensor",
      "EndsTensor",
      "StartsTensorList",
W
wanghuancoder 已提交
101 102
      "EndsTensorList"}},
    {"strided_slice",
103 104 105 106 107 108 109
     {"Input",
      "StartsTensor",
      "EndsTensor",
      "StridesTensor",
      "StartsTensorList",
      "EndsTensorList",
      "StridesTensorList"}},
W
wanghuancoder 已提交
110
    {"set_value",
111 112 113 114
     {"Input",
      "ValueTensor",
      "StartsTensorList",
      "EndsTensorList",
W
wanghuancoder 已提交
115
      "StepsTensorList"}},
116 117 118
    {"fake_quantize_dequantize_moving_average_abs_max",
     {"X", "InScale", "InAccum", "InState"}},
    {"nll_loss", {"X", "Label", "Weight"}},
119
    {"smooth_l1_loss", {"X", "Y", "InsideWeight", "OutsideWeight"}},
120 121
    {"bilinear_tensor_product", {"X", "Y", "Weight", "Bias"}},
    {"gather", {"X", "Index", "Axis"}},
K
kuizhiqing 已提交
122
    {"repeat_interleave", {"X", "RepeatsTensor"}},
123 124
    {"roi_pool", {"X", "ROIs", "RoisNum"}},
    {"roi_align", {"X", "ROIs", "RoisNum"}},
125
    {"prroi_pool", {"X", "ROIs", "BatchRoINums"}},
126 127 128 129 130 131 132 133 134 135 136
    {"psroi_pool", {"X", "ROIs", "RoisNum"}},
    {"collect_fpn_proposals",
     {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}},
    {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}},
    {"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}},
    {"hierarchical_sigmoid",
     {"X", "W", "Label", "PathTable", "PathCode", "Bias"}},
    {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}},
    {"multiclass_nms3", {"BBoxes", "Scores", "RoisNum"}},
    {"box_coder", {"PriorBox", "PriorBoxVar", "TargetBox"}},
    {"momentum", {"Param", "Grad", "Velocity", "LearningRate", "MasterParam"}},
137 138
    {"merged_momentum",
     {"Param", "Grad", "Velocity", "LearningRate", "MasterParam"}},
139 140
    {"sparse_momentum",
     {"Param", "Grad", "Velocity", "Index", "LearningRate", "MasterParam"}},
141 142 143
    {"rnn", {"Input", "PreState", "WeightList", "SequenceLength"}},
    {"run_program", {"X", "Params"}},
    {"fused_feedforward",
144 145 146 147 148 149 150 151
     {"Dropout1Seed",
      "Dropout2Seed",
      "Linear1Bias",
      "Linear2Bias",
      "Ln1Scale",
      "Ln1Bias",
      "Ln2Scale",
      "Ln2Bias"}},
152 153
    {"faster_tokenizer", {"Text", "Vocab", "TextPair"}},
    {"matrix_rank", {"X", "TolTensor"}},
154 155 156 157 158 159 160 161
    {"rmsprop",
     {"Param",
      "MeanSquare",
      "Grad",
      "Moment",
      "LearningRate",
      "MeanGrad",
      "MasterParam"}},
162
    {"adam",
163 164 165 166 167 168 169 170
     {"Param",
      "Grad",
      "LearningRate",
      "Moment1",
      "Moment2",
      "Beta1Pow",
      "Beta2Pow",
      "MasterParam"}},
Z
zhangbo9674 已提交
171
    {"merged_adam",
172 173 174 175 176 177 178 179
     {"Param",
      "Grad",
      "LearningRate",
      "Moment1",
      "Moment2",
      "Beta1Pow",
      "Beta2Pow",
      "MasterParam"}},
180
    {"fused_adam",
181 182 183 184 185 186 187 188 189
     {"Params",
      "Grads",
      "LearningRate",
      "Moments1",
      "Moments2",
      "Beta1Pows",
      "Beta2Pows",
      "MasterParams",
      "SkipUpdate"}},
190
    {"adamw",
191 192 193 194 195 196 197 198
     {"Param",
      "Grad",
      "LearningRate",
      "Moment1",
      "Moment2",
      "Beta1Pow",
      "Beta2Pow",
      "MasterParam"}},
199 200 201 202 203 204 205 206
    {"adamax",
     {"Param",
      "Grad",
      "LearningRate",
      "Moment",
      "InfNorm",
      "Beta1Pow",
      "MasterParam"}},
207
    {"lamb",
208 209 210 211 212 213 214 215
     {"Param",
      "Grad",
      "LearningRate",
      "Moment1",
      "Moment2",
      "Beta1Pow",
      "Beta2Pow",
      "MasterParam"}},
216 217
    {"sparse_attention",
     {"Q", "K", "V", "Offset", "Columns", "KeyPaddingMask", "AttnMask"}},
218
    {"sgd", {"Param", "LearningRate", "Grad", "MasterParam"}},
219
    {"adagrad", {"Param", "Grad", "Moment", "LearningRate", "MasterParam"}},
220 221
    {"adadelta",
     {"Param", "Grad", "AvgSquaredGrad", "AvgSquaredUpdate", "MasterParam"}},
S
Siming Dai 已提交
222
    {"graph_khop_sampler", {"Row", "Eids", "Col_Ptr", "X"}},
W
Weilong Wu 已提交
223
    {"nce",
224 225 226 227 228 229 230 231
     {"Input",
      "Label",
      "Weight",
      "Bias",
      "SampleWeight",
      "CustomDistProbs",
      "CustomDistAlias",
      "CustomDistAliasProbs"}},
232
    {"yolov3_loss", {"X", "GTBox", "GTLabel", "GTScore"}},
F
furnace 已提交
233
    {"check_finite_and_unscale", {"X", "Scale", "FloatStatus"}},
234
    {"group_norm", {"X", "Scale", "Bias"}},
235 236 237
    {"linear_chain_crf", {"Emission", "Transition", "Label", "Length"}},
    {"crf_decoding", {"Emission", "Transition", "Label", "Length"}},
    {"chunk_eval", {"Inference", "Label", "SeqLength"}},
238
    {"sequence_mask", {"X", "MaxLenTensor"}},
S
Siming Dai 已提交
239 240 241
    {"graph_reindex",
     {"X", "Neighbors", "Count", "HashTable_Value", "HashTable_Index"}},
    {"graph_sample_neighbors", {"Row", "Col_Ptr", "X", "Eids", "Perm_Buffer"}},
242 243 244 245 246
    {"crop", {"X", "Y", "Offsets"}},
    {"batch_norm",
     {"X", "Scale", "Bias", "Mean", "Variance", "MomentumTensor"}},
    {"inplace_abn",
     {"X", "Scale", "Bias", "Mean", "Variance", "MomentumTensor"}},
247 248 249 250 251
    {"linear_interp", {"X", "OutSize"}},
    {"bilinear_interp", {"X", "OutSize"}},
    {"trilinear_interp", {"X", "OutSize"}},
    {"nearest_interp", {"X", "OutSize"}},
    {"bicubic_interp", {"X", "OutSize"}},
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
    {"resnet_basic_block",
     {"X",
      "Filter1",
      "Scale1",
      "Bias1",
      "Mean1",
      "Var1",
      "Filter2",
      "Scale2",
      "Bias2",
      "Mean2",
      "Var2",
      "Filter3",
      "Scale3",
      "Bias3",
      "Mean3",
      "Var3"}},
269
    {"graph_send_recv", {"X", "Src_index", "Dst_index", "Out_size"}},
270
    {"graph_send_ue_recv", {"X", "Y", "Src_index", "Dst_index", "Out_size"}},
271 272 273 274 275 276 277 278 279 280 281 282 283 284
};

// NOTE(zhiqiu): Like op_ins_map.
// Commonly, the outputs in auto-generated OP function are determined by the
// OP`s proto automatically, i.e., all the outputs registered in OpMaker.
// However, some OPs have dispensable outputs, which means the output can
// be none for some conditions. It is discovered that most dispensable outputs
// is not used in imperative mode, so we drop those outputs when generating OP
// functions. While, for very few OPs, the dispensable outputs are used, we
// need to manually specify them in this map.
std::map<std::string, std::set<std::string>> op_outs_map = {
    {"fake_quantize_dequantize_moving_average_abs_max",
     {"Out", "OutScale", "OutAccum", "OutState"}},
    {"batch_norm",
285 286 287 288 289
     {"Y",
      "MeanOut",
      "VarianceOut",
      "SavedMean",
      "SavedVariance",
290
      "ReserveSpace"}},
291
    {"lstsq", {"Solution", "Residuals", "Rank", "SingularValues"}},
292
    {"inplace_abn",
293 294 295 296 297
     {"Y",
      "MeanOut",
      "VarianceOut",
      "SavedMean",
      "SavedVariance",
298
      "ReserveSpace"}},
299 300 301 302 303 304 305 306 307 308
    {"fused_attention", {"LnMean",         "LnVariance",
                         "LnOut",          "QKVOut",
                         "QKVBiasOut",     "TransposeOut2",
                         "QKOut",          "QKTVOut",
                         "SoftmaxOut",     "AttnDropoutMaskOut",
                         "AttnDropoutOut", "SrcMaskOut",
                         "FMHAOut",        "OutLinearOut",
                         "DropoutMaskOut", "Ln2Mean",
                         "Ln2Variance",    "BiasDropoutResidualOut",
                         "CacheKVOut",     "Y"}},
309 310
    {"fused_bias_dropout_residual_layer_norm",
     {"BiasDropoutResidualOut", "DropoutMaskOut", "LnMean", "LnVariance", "Y"}},
311
    {"fused_gate_attention",
312 313 314 315 316 317 318 319
     {"QueryTransposeOut",
      "KeyTransposeOut",
      "ValueTransposeOut",
      "QKVTransposeOut",
      "SoftmaxOut",
      "FMHAOut",
      "GateOut",
      "Out"}},
320
    {"sync_batch_norm",
321 322 323 324 325
     {"Y",
      "MeanOut",
      "VarianceOut",
      "SavedMean",
      "SavedVariance",
326
      "ReserveSpace"}},
327 328 329 330 331
    {"adadelta",
     {"ParamOut",
      "AvgSquaredGradOut",
      "AvgSquaredUpdateOut",
      "MasterParamOut"}},
332 333 334 335 336 337 338 339 340
    {"unique", {"Out", "Index", "Indices", "Counts"}},
    {"unique_consecutive", {"Out", "Index", "Counts"}},
    {"generate_proposals", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}},
    {"collect_fpn_proposals", {"FpnRois", "RoisNum"}},
    {"matrix_nms", {"Out", "Index", "RoisNum"}},
    {"distribute_fpn_proposals",
     {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}},
    {"moving_average_abs_max_scale",
     {"Out", "OutScale", "OutAccum", "OutState"}},
341 342 343 344 345 346
    {"rmsprop",
     {"ParamOut",
      "MomentOut",
      "MeanSquareOut",
      "MeanGradOut",
      "MasterParamOut"}},
347 348 349
    {"multiclass_nms3", {"Out", "NmsRoisNum"}},
    {"generate_proposals_v2", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}},
    {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
350
    {"merged_momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
351
    {"sparse_momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
352
    {"rnn", {"DropoutState", "Reserve", "Out", "State"}},
353
    {"run_program", {"DOut", "CUDAGraph"}},
354
    {"adam",
355 356 357 358 359
     {"ParamOut",
      "Moment1Out",
      "Moment2Out",
      "Beta1PowOut",
      "Beta2PowOut",
360
      "MasterParamOut"}},
Z
zhangbo9674 已提交
361
    {"merged_adam",
362 363 364 365 366
     {"ParamOut",
      "Moment1Out",
      "Moment2Out",
      "Beta1PowOut",
      "Beta2PowOut",
Z
zhangbo9674 已提交
367
      "MasterParamOut"}},
368
    {"fused_adam",
369 370 371 372 373 374
     {"ParamsOut",
      "Moments1Out",
      "Moments2Out",
      "Beta1PowsOut",
      "Beta2PowsOut",
      "MasterParamsOut"}},
375
    {"adamw",
376 377 378 379 380
     {"ParamOut",
      "Moment1Out",
      "Moment2Out",
      "Beta1PowOut",
      "Beta2PowOut",
381
      "MasterParamOut"}},
382 383
    {"adamax",
     {"ParamOut", "MomentOut", "InfNormOut", "Beta1Pow", "MasterParamOut"}},
384
    {"sgd", {"ParamOut", "MasterParamOut"}},
385
    {"adagrad", {"ParamOut", "MomentOut", "MasterParamOut"}},
386
    {"lamb",
387 388 389 390 391
     {"ParamOut",
      "Moment1Out",
      "Moment2Out",
      "Beta1PowOut",
      "Beta2PowOut",
392
      "MasterParamOut"}},
393
    {"fused_multi_transformer", {"CacheKVOut", "Out"}},
394
    {"fused_multi_transformer_int8", {"CacheKVOut", "Out"}},
395 396 397 398 399 400
    {"resnet_basic_block",
     {"Y",         "Conv1",     "SavedMean1", "SavedInvstd1", "Mean1Out",
      "Var1Out",   "Conv2",     "SavedMean2", "SavedInvstd2", "Mean2Out",
      "Var2Out",   "Conv3",     "SavedMean3", "SavedInvstd3", "Mean3Out",
      "Var3Out",   "MaxInput1", "MaxFilter1", "MaxInput2",    "MaxFilter2",
      "MaxInput3", "MaxFilter3"}},
401
};
402 403 404 405 406 407 408 409 410 411 412 413 414

// NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are
// generated in C++ automatically.
// However, some OPs need to pass the outputs from Python instead of generating
// them in C++. There are mainly 2 reasons for that,
// (1) Optimizer OPs need to update the input param in-place, like sgd.
//     So they need to pass the output which is same as input param.
// (2) Very few python APIs has out in their arguments, like fill_constant.
//     So they need to pass the python output to C++.
//     Actually, this is not a good design, since it may break the SSA graph,
//     especially in declarative mode.
// For those OPs, we need to manually specify the outs need to pass in this map.
std::map<std::string, std::set<std::string>> op_passing_outs_map = {
415
    {"sgd", {"ParamOut", "MasterParamOut"}},
416 417 418 419 420 421
    {"rmsprop",
     {"ParamOut",
      "MomentOut",
      "MeanSquareOut",
      "MeanGradOut",
      "MasterParamOut"}},
422
    {"ftrl", {"ParamOut", "SquaredAccumOut", "LinearAccumOut"}},
423 424 425 426 427
    {"adadelta",
     {"ParamOut",
      "AvgSquaredGradOut",
      "AvgSquaredUpdateOut",
      "MasterParamOut"}},
428
    {"adagrad", {"ParamOut", "MomentOut", "MasterParamOut"}},
429
    {"adamax", {"ParamOut", "MomentOut", "InfNormOut", "MasterParamOut"}},
430 431 432 433
    {"dpsgd", {"ParamOut"}},
    {"decayed_adagrad", {"ParamOut", "MomentOut"}},
    {"lars_momentum", {"ParamOut", "VelocityOut"}},
    {"coalesce_tensor", {"Output", "FusedOutput"}},
434
    {"adam",
435 436 437 438 439
     {"ParamOut",
      "Moment1Out",
      "Moment2Out",
      "Beta1PowOut",
      "Beta2PowOut",
440
      "MasterParamOut"}},
Z
zhangbo9674 已提交
441
    {"merged_adam",
442 443 444 445 446
     {"ParamOut",
      "Moment1Out",
      "Moment2Out",
      "Beta1PowOut",
      "Beta2PowOut",
Z
zhangbo9674 已提交
447
      "MasterParamOut"}},
448
    {"fused_adam",
449 450 451 452 453 454
     {"ParamsOut",
      "Moments1Out",
      "Moments2Out",
      "Beta1PowsOut",
      "Beta2PowsOut",
      "MasterParamsOut"}},
455
    {"adamw",
456 457 458 459 460
     {"ParamOut",
      "Moment1Out",
      "Moment2Out",
      "Beta1PowOut",
      "Beta2PowOut",
461
      "MasterParamOut"}},
462
    {"lamb",
463 464 465 466 467
     {"ParamOut",
      "Moment1Out",
      "Moment2Out",
      "Beta1PowOut",
      "Beta2PowOut",
468
      "MasterParamOut"}},
469
    {"average_accumulates",
470 471 472 473 474 475
     {"out_sum_1",
      "out_sum_2",
      "out_sum_3",
      "out_num_accumulates",
      "out_old_num_accumulates",
      "out_num_updates"}},
476
    {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
477
    {"merged_momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
478
    {"sparse_momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}},
479
    {"batch_norm", {"MeanOut", "VarianceOut"}},
480
    {"inplace_abn", {"MeanOut", "VarianceOut"}},
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
    {"sync_batch_norm", {"MeanOut", "VarianceOut"}},
    {"accuracy", {"Correct", "Total"}},
    {"fill_constant", {"Out"}},
    {"recv_v2", {"Out"}},
    {"partial_recv", {"Out"}},
    {"matmul", {"Out"}},
    {"c_broadcast", {"Out"}},
    {"c_sync_calc_stream", {"Out"}},
    {"c_sync_comm_stream", {"Out"}},
    {"c_reduce_sum", {"Out"}},
    {"c_reduce_max", {"Out"}},
    {"c_reduce_min", {"Out"}},
    {"c_reduce_prod", {"Out"}},
    {"c_reduce", {"Out"}},
    {"c_scatter", {"Out"}},
    {"barrier", {"Out"}},
    {"fake_quantize_dequantize_moving_average_abs_max",
     {"Out", "OutScale", "OutAccum", "OutState"}},
    {"fake_quantize_dequantize_abs_max", {"Out", "OutScale"}},
    {"fake_channel_wise_quantize_dequantize_abs_max", {"Out", "OutScale"}},
    {"check_finite_and_unscale", {"Out", "FoundInfinite"}},
    {"update_loss_scaling",
     {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}},
    {"moving_average_abs_max_scale",
     {"Out", "OutScale", "OutAccum", "OutState"}},
    {"rnn", {"DropoutState"}},
507
    {"run_program", {"Out", "DOut", "OutScope", "CUDAGraph"}},
508 509
    {"clear_float_status", {"FloatStatusOut"}},
    {"get_float_status", {"FloatStatusOut"}},
W
wanghuancoder 已提交
510 511
    {"assign", {"Out"}},
    {"assign_value", {"Out"}},
512 513
    {"split", {"Out"}},
    {"concat", {"Out"}},
514
    {"fused_multi_transformer", {"CacheKVOut"}},
515
    {"fused_multi_transformer_int8", {"CacheKVOut"}},
516
    {"group_norm", {"Mean", "Variance"}},
517 518
    {"resnet_basic_block",
     {"Mean1Out", "Var1Out", "Mean2Out", "Var2Out", "Mean3Out", "Var3Out"}},
519 520 521 522 523 524 525 526 527 528 529 530 531 532
};

// NOTE(pangyoki): Tensor View Strategy.
// In this case, a new output varbase will be created, and this varbase will
// reuse the input varbase's allocation.
// It's a map. The key of outer map is the view op name, the value is
// a pair which implies the mapping relationship between the input and
// output varbase.
std::map<std::string, std::pair<std::string, std::string>> view_op_map = {
    {"squeeze2", {"X", "Out"}},  // "X" -> "Out"
    {"unsqueeze2", {"X", "Out"}},
    {"reshape2", {"X", "Out"}},
    {"flatten_contiguous_range", {"X", "Out"}},
};
533 534 535 536 537 538 539 540 541

// NOTE(pangyoki): Special inplace ops that are not supported in temporary.
// The input and output of some inplace ops are special, such as
// duplicate input. These inplace ops have no usage scenarios and
// are not supported in temporary.
std::set<std::string> special_inplace_op_set = {
    "sum",     // `sum` op has duplicate input
    "assign",  // output of `assign` op is in `op_passing_outs_map`
};
542 543 544 545 546 547 548 549

// NOTE(pangyoki): Special no_need_buffer ops that are not supported in
// temporary.
// sequence_conv op will raise error to get no_need_buffer info during
// compiling.
std::set<std::string> special_no_need_buffer_op_set = {
    "sequence_conv",
};