generate_pd_op_dialect_from_paddle_op_maker.py 15.7 KB
Newer Older
1
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2
#
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle.fluid.framework as framework
from paddle.fluid import core


# collect original ops: op which has both inference and grid defination
def get_original_ops():
    all_ops, _, _ = core.op_supported_infos('CPU', core.VarDesc.VarType.FP16)
    grad_ops = []
    original_ops = []
24
    necessary_ops = ["scale"]
25 26 27 28 29 30 31 32 33

    for op in all_ops:
        if op.endswith("_grad"):
            if op.endswith("_grad_grad"):
                continue
            grad_ops.append(op)
    for op in all_ops:
        if str(op + "_grad") in grad_ops:
            original_ops.append(op)
34 35
        elif op in necessary_ops:
            original_ops.append(op)
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60

    print("Grad ops num: " + str(len(grad_ops)))
    print("Responded original ops num: " + str(len(original_ops)))
    return original_ops


# functions of parsing Paddle Proto
INPUTS = "Inputs"
OUTPUTS = "Outputs"
ATTRS = "Attrs"
COMMENT = "Comment"

DUPLICABLE = "duplicable"
INTERMEDIATE = "intermediate"
DISPENSABLE = "dispensable"

TYPE = "type"
GENERATED = "generated"
DEFAULT_VALUE = "default_value"

EXTRA = "extra"
QUANT = "quant"


def get_attr_default_value(op_name):
61
    return core.get_op_attrs_default_value(op_name.encode())
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109


def get_vars_info(op_vars_proto):
    vars_info = {}
    for var_proto in op_vars_proto:
        name = str(var_proto.name)
        vars_info[name] = {}
        vars_info[name][DUPLICABLE] = var_proto.duplicable
        vars_info[name][DISPENSABLE] = var_proto.dispensable
        vars_info[name][INTERMEDIATE] = var_proto.intermediate
        vars_info[name][EXTRA] = var_proto.extra
        vars_info[name][QUANT] = var_proto.quant
    return vars_info


def get_attrs_info(op_proto, op_attrs_proto):
    attrs_info = {}
    attrs_default_values = get_attr_default_value(op_proto.type)
    for attr_proto in op_attrs_proto:
        attr_name = str(attr_proto.name)
        attrs_info[attr_name] = {}
        attrs_info[attr_name][TYPE] = attr_proto.type
        attrs_info[attr_name][GENERATED] = attr_proto.generated
        attrs_info[attr_name][DEFAULT_VALUE] = attrs_default_values[
            attr_name] if attr_name in attrs_default_values else None
        attrs_info[attr_name][EXTRA] = attr_proto.extra
        attrs_info[attr_name][QUANT] = attr_proto.quant
    return attrs_info


def get_op_desc(op_proto):
    op_info = {}
    op_info[INPUTS] = get_vars_info(op_proto.inputs)
    op_info[OUTPUTS] = get_vars_info(op_proto.outputs)
    op_info[ATTRS] = get_attrs_info(op_proto, op_proto.attrs)
    op_info[COMMENT] = op_proto.comment
    return op_info


def get_all_ops_desc():
    all_op_protos_dict = {}
    all_op_protos = framework.get_all_op_protos()
    for op_proto in all_op_protos:
        op_type = str(op_proto.type)
        all_op_protos_dict[op_type] = get_op_desc(op_proto)
    return all_op_protos_dict


110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
def generate_all_ops_inputs_outputs_map(op_descs):
    # 1. Collect input and output name information of each Op
    original_ops_ = get_original_ops()
    ops_inputs_map = {}
    ops_outputs_map = {}
    for op_type, op_proto in op_descs.items():
        if op_type not in original_ops_:
            continue
        inputs = list()
        outpus = list()
        for input_ in op_proto[INPUTS]:
            if op_proto[INPUTS][input_][EXTRA] != True and op_proto[INPUTS][
                    input_][INTERMEDIATE] != True:
                inputs.append(input_)
        for output_ in op_proto[OUTPUTS]:
            if op_proto[OUTPUTS][output_][EXTRA] != True and op_proto[OUTPUTS][
                    output_][INTERMEDIATE] != True:
                outpus.append(output_)
        ops_inputs_map[op_type] = inputs
        ops_outputs_map[op_type] = outpus

    # 2. Generate Cpp style map str
    cpp_style_ops_inputs_map_str = ""
    start_ = "#include <unordered_map>\n#include <vector>\n#include <string>\n" + \
            "const std::unordered_map<std::string, std::unordered_map<std::string, uint8_t>> pd_dialect_inputs_info_map_  = {\n"
    ops_inputs_str = ""
    for ele in ops_inputs_map.items():
        op_name = ele[0]
        op_inputs = ele[1]
        op_inputs_str = "{"
        input_idx = 0
        for op_input in op_inputs:
            op_input_str = '{left_brace}"{op_input}", {input_idx}{right_brace}, '.format(
                left_brace="{",
                op_input=op_input,
                input_idx=input_idx,
                right_brace="}")
            input_idx = input_idx + 1
            op_inputs_str = op_inputs_str + op_input_str
        op_inputs_str = op_inputs_str[:-2] + "}"
        pair = '{left_brace}"{op_name}", {op_inputs}{right_brace},\n'.format(
            left_brace="{",
            op_name=op_name,
            op_inputs=op_inputs_str,
            right_brace="}")
        ops_inputs_str = ops_inputs_str + "    " + pair
    ops_inputs_str = ops_inputs_str[:-2]
    cpp_style_ops_inputs_map_str = start_ + ops_inputs_str + "\n};"

    cpp_style_ops_outputs_map_str = ""
    start_ = "const std::unordered_map<std::string, std::unordered_map<std::string, uint8_t>> pd_dialect_outputs_info_map_  = {\n"
    ops_outputs_str = ""
    for ele in ops_outputs_map.items():
        op_name = ele[0]
        op_outputs = ele[1]
        op_outputs_str = "{"
        output_idx = 0
        for op_output in op_outputs:
            op_output_str = '{left_brace}"{op_output}", {output_idx}{right_brace}, '.format(
                left_brace="{",
                op_output=op_output,
                output_idx=output_idx,
                right_brace="}")
            output_idx = output_idx + 1
            op_outputs_str = op_outputs_str + op_output_str
        op_outputs_str = op_outputs_str[:-2] + "}"
        pair = '{left_brace}"{op_name}", {op_outputs}{right_brace},\n'.format(
            left_brace="{",
            op_name=op_name,
            op_outputs=op_outputs_str,
            right_brace="}")
        ops_outputs_str = ops_outputs_str + "    " + pair
    ops_outputs_str = ops_outputs_str[:-2]
    cpp_style_ops_outputs_map_str = start_ + ops_outputs_str + "\n};"

    # 3. Write to header file
186
    dst_head_file = "../../paddle/infrt/dialect/pd/common/pd_ops_info.h"
187 188 189 190 191 192
    with open(dst_head_file, 'w') as ops_inputs_outputs_head_file:
        ops_inputs_outputs_head_file.write(cpp_style_ops_inputs_map_str)
        ops_inputs_outputs_head_file.write("\n\n")
        ops_inputs_outputs_head_file.write(cpp_style_ops_outputs_map_str)


H
huzhiqiang 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
def get_constraint(op_type, op_proto):
    # 2.3.1 inputs
    constraint = "NoSideEffect"

    optional_input_num_ = 0
    for input_ in op_proto[INPUTS]:
        if op_proto[INPUTS][input_][EXTRA] != True and op_proto[INPUTS][input_][
                INTERMEDIATE] != True and op_proto[INPUTS][input_][
                    DISPENSABLE] == True:
            optional_input_num_ += 1
    if optional_input_num_ > 1:
        constraint += ", AttrSizedOperandSegments"
    return constraint


208 209
# funtion to generate paddle op dialect file
def convert_op_proto_into_mlir(op_descs):
210
    dst_dialect_file = "../../paddle/infrt/dialect/pd/ir/pd_ops.td"
211 212 213 214 215 216 217 218 219 220 221

    # 1. Head files
    comment_ = "/*===- TableGen'source file -----------------------------------------------===*\\\n\
|*                                                                            *|\n\
|* Op Definitions                                                             *|\n\
|*                                                                            *|\n\
|* Automatically generated file, do not edit!                                 *|\n\
|* Generated by tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py    *|\n\
|*                                                                            *|\n\
\*===----------------------------------------------------------------------===*/\n"

222 223 224 225 226 227
    lines = [
        "#ifndef PD_OPS",
        "#define PD_OPS",
        "include \"mlir/Interfaces/InferTypeOpInterface.td\"",
        "include \"mlir/Interfaces/LoopLikeInterface.td\"",
        "include \"mlir/IR/OpBase.td\"",
228
        "include \"paddle/infrt/dialect/pd/ir/pd_op_base.td\"",
229 230 231 232 233
        "",
    ]

    start_ = comment_ + "\n".join(lines)

234 235 236 237 238 239 240 241 242 243
    with open(dst_dialect_file, 'w') as ops_mlir_file:
        ops_mlir_file.write(start_)

    # 2. Op dialect
    # skip list ( ops whose dialect can not be generated automatically will be recorded here)
    skipped_op_list = [
        "cos_sim", "fused_embedding_seq_pool", "cosh", "kron", "recurrent",
        "while", "conditional_block", "set_value", "run_program"
    ]
    skipped_attr_list = [
244
        "trainable_statistics", "use_global_stats", "is_test", "use_quantizer"
245
    ]
246

247 248 249 250 251 252
    original_ops_ = get_original_ops()
    automatically_generated_op_dialect = []
    for op_type, op_proto in op_descs.items():
        if (op_type in skipped_op_list) or (op_type not in original_ops_):
            continue
        automatically_generated_op_dialect.append(op_type)
H
huzhiqiang 已提交
253
        constraint_ = get_constraint(op_type, op_proto)
254
        # 2.1 OpDef
H
huzhiqiang 已提交
255
        HEAD = 'def PD_{op_type_capitalize}Op : PD_Op<"{op_type}", [{constraint}]> {left_brace}\n'.format(
256
            op_type_capitalize=op_type.capitalize(),
H
huzhiqiang 已提交
257
            constraint=constraint_,
258 259 260
            op_type=op_type,
            left_brace="{")
        SUMMARY = '  let summary = "{} op";\n'.format(op_type)
261 262

        # 2.2 Description
263 264 265 266 267 268
        contents = ""
        origin_contents = (op_proto[COMMENT]).split("\n")
        for line_ in origin_contents:
            contents = contents + "    {}\n".format(line_)
        DESCRIPTION = "  let description = [{left_brace}\n{description}  {right_brace}];\n".format(
            left_brace="{", description=contents, right_brace="}")
269 270 271 272 273

        # 2.3 arguments info
        ARGUMENTS = ""
        if (len(op_proto[INPUTS]) > 0 or len(op_proto[ATTRS]) > 0):
            ARGUMENTS = "  let arguments = (ins "
H
huzhiqiang 已提交
274

275 276 277 278
            # 2.3.1 inputs
            for input_ in op_proto[INPUTS]:
                if op_proto[INPUTS][input_][EXTRA] != True and op_proto[INPUTS][
                        input_][INTERMEDIATE] != True:
H
huzhiqiang 已提交
279 280 281 282 283
                    if op_proto[INPUTS][input_][DISPENSABLE] != True:
                        if op_proto[INPUTS][input_][DUPLICABLE] != True:
                            ARGUMENTS = ARGUMENTS + " PD_Tensor:$" + input_ + ","
                        else:
                            ARGUMENTS = ARGUMENTS + " PD_Tensor_Array:$" + input_ + ","
284
                    else:
H
huzhiqiang 已提交
285 286 287 288 289
                        if op_proto[INPUTS][input_][DUPLICABLE] != True:
                            ARGUMENTS = ARGUMENTS + " Optional<PD_Tensor>:$" + input_ + ","
                        else:
                            ARGUMENTS = ARGUMENTS + " Optional<PD_Tensor_Array>:$" + input_ + ","

290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
            # unsupported:   BLOCK = 8;  BLOCKS = 10;
            attr_mlir_converter = {
                0: 'SI32Attr',
                1: 'F32Attr',
                2: 'StrAttr',
                3: 'I32ArrayAttr',
                4: 'F32ArrayAttr',
                5: 'StrArrayAttr',
                6: 'BoolAttr',
                7: 'BoolArrayAttr',
                9: 'SI64Attr',
                11: 'I64ArrayAttr'
            }

            # 2.3.2 attributes
            for attr in op_proto[ATTRS]:
306 307
                if (op_proto[ATTRS][attr][EXTRA]
                        == True) or (attr in skipped_attr_list):
308 309 310
                    continue
                if op_proto[ATTRS][attr][DEFAULT_VALUE] != None:
                    if op_proto[ATTRS][attr][TYPE] in attr_mlir_converter:
311 312 313 314 315 316 317 318 319 320 321 322 323
                        default_value = str(
                            op_proto[ATTRS][attr][DEFAULT_VALUE])
                        if (attr_mlir_converter[op_proto[ATTRS][attr][TYPE]]
                                in [
                                    'I32ArrayAttr', 'F32ArrayAttr',
                                    'StrArrayAttr', 'BoolArrayAttr',
                                    'I64ArrayAttr'
                                ]):
                            default_value = default_value.replace('[',
                                                                  '{').replace(
                                                                      ']', '}')
                        if (attr_mlir_converter[op_proto[ATTRS][attr][TYPE]]
                                in ['BoolAttr', 'BoolArrayAttr']):
324 325 326 327
                            default_value = default_value.lower()
                        elif (attr_mlir_converter[op_proto[ATTRS][attr][TYPE]]
                              in ['StrAttr', 'StrArrayAttr']):
                            default_value = default_value.replace('\'', '\\\"')
328 329
                            if attr_mlir_converter[op_proto[ATTRS][attr]
                                                   [TYPE]] == "StrAttr":
330 331 332 333 334 335
                                default_value = '\\\"' + default_value + '\\\"'
                        attr_list = " DefaultValuedAttr<" + attr_mlir_converter[
                            op_proto[ATTRS][attr]
                            [TYPE]] + ", \"" + default_value + "\">:$" + attr + ","
                        ARGUMENTS += attr_list
                    else:
336 337
                        print("Error:" + op_type + ":" + attr + ":" +
                              str(op_proto[ATTRS][attr][TYPE]))
338 339
                else:
                    if op_proto[ATTRS][attr][TYPE] in attr_mlir_converter:
340 341
                        attr_type_ = attr_mlir_converter[op_proto[ATTRS][attr]
                                                         [TYPE]]
342
                        if (attr_type_ in [
343 344
                                'StrAttr', 'I32ArrayAttr', 'F32ArrayAttr',
                                'StrArrayAttr', 'BoolArrayAttr', 'I64ArrayAttr'
345 346 347 348
                        ]):
                            attr_list = attr_type_ + ":$" + attr + ","
                            ARGUMENTS += attr_list
                    else:
349 350
                        print(" ouch Error:" + op_type + ":" + attr + ":" +
                              str(op_proto[ATTRS][attr][TYPE]))
351 352 353 354 355
            ARGUMENTS = ARGUMENTS[:-1] + ");\n"

        # 2.4 results info
        RESULTS = ""
        if (len(op_proto[OUTPUTS]) > 0):
356
            outputs = ""
357 358 359
            for output_ in op_proto[OUTPUTS]:
                if op_proto[OUTPUTS][output_][EXTRA] != True and op_proto[
                        OUTPUTS][output_][INTERMEDIATE] != True:
H
huzhiqiang 已提交
360
                    if op_proto[OUTPUTS][output_][DUPLICABLE] != True:
361
                        outputs = outputs + "PD_Tensor:${},".format(output_)
362
                    else:
363 364 365 366
                        outputs = outputs + "PD_Tensor_Array:${},".format(
                            output_)
            RESULTS = "\n  let results = (outs {});\n".format(outputs[:-1])

367 368 369 370 371 372 373 374
        with open(dst_dialect_file, 'a') as ops_mlir_file:
            ops_mlir_file.write(HEAD)
            ops_mlir_file.write(SUMMARY)
            ops_mlir_file.write(DESCRIPTION)
            ops_mlir_file.write(ARGUMENTS)
            ops_mlir_file.write(RESULTS)
            ops_mlir_file.write("}\n")

375 376 377
    with open(dst_dialect_file, 'a') as ops_mlir_file:
        ops_mlir_file.write("\n#endif  // PD_OPS")

378
    print("Skipped ops num: " + str(len(skipped_op_list)))
379 380
    print("Automatically generated op dialects num: " +
          str(len(automatically_generated_op_dialect)))
381 382 383 384


if __name__ == "__main__":
    all_op_protos_dict = get_all_ops_desc()
385
    generate_all_ops_inputs_outputs_map(all_op_protos_dict)
386
    convert_op_proto_into_mlir(all_op_protos_dict)