From f3cac96659d5f8a5a924609acc5fbe60f34279f7 Mon Sep 17 00:00:00 2001
From: Yiqun Liu <Xreki@users.noreply.github.com>
Date: Thu, 9 Mar 2023 17:04:26 +0800
Subject: [PATCH] Add the collect and print of kernel registry infomation in op
 benchmark ci. (#51309)

* Add the collect and print of kernel registry infomation in op benchmark ci.

* Little change to test the ci.

* Remove the reduntant function.

* Move the collect of kernel registry information to the end of ci.
---
 .../phi/kernels/funcs/transpose_function.cu.h |   4 +-
 tools/ci_op_benchmark.sh                      |   7 +
 tools/parse_kernel_info.py                    | 182 ++++++++++++++++++
 3 files changed, 191 insertions(+), 2 deletions(-)
 create mode 100644 tools/parse_kernel_info.py
diff --git a/paddle/phi/kernels/funcs/transpose_function.cu.h b/paddle/phi/kernels/funcs/transpose_function.cu.h
index 9f746349a67..f49a3785622 100644
--- a/paddle/phi/kernels/funcs/transpose_function.cu.h
+++ b/paddle/phi/kernels/funcs/transpose_function.cu.h
@@ -966,8 +966,8 @@ struct PermuteParams {
   IdxAndOffsetHelper<IndexT, Rank> dst_index_helper;
   int perm[Rank]{};
 
-  explicit PermuteParams(const std::vector<int64_t>& dims,
-                         const std::vector<int>& perm_) {
+  PermuteParams(const std::vector<int64_t>& dims,
+                const std::vector<int>& perm_) {
     IndexT dst_dims[Rank];
     IndexT src_dims[Rank];
     for (auto i = 0; i < Rank; ++i) {
diff --git a/tools/ci_op_benchmark.sh b/tools/ci_op_benchmark.sh
index b7a6fcece3c..31a23036fe4 100644
--- a/tools/ci_op_benchmark.sh
+++ b/tools/ci_op_benchmark.sh
@@ -34,6 +34,12 @@ function LOG {
   echo "[$0:${BASH_LINENO[0]}] $*" >&2
 }
 
+function collect_kernel_registry_info {
+  LOG "[INFO] Collect kernel registry info ..."
+  python ${PADDLE_ROOT}/tools/parse_kernel_info.py
+  [ $? -ne 0 ] && LOG "[FATAL] Collect kernel registry info fail."
+}
+
 # Limit cu file directory
 function match_cu_file_directory {
   LOG "[INFO] run function match_cu_file_directory"
@@ -299,6 +305,7 @@ function gpu_op_benchmark {
   run_op_benchmark_test
   summary_problems
   LOG "[INFO] Op benchmark run success and no error!"
+  collect_kernel_registry_info
   exit 0
 }
 
diff --git a/tools/parse_kernel_info.py b/tools/parse_kernel_info.py
new file mode 100644
index 00000000000..57c8d2db566
--- /dev/null
+++ b/tools/parse_kernel_info.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+
+
+class KernelInfo(object):
+    def __init__(self, op_type):
+        self.op_type = op_type
+        self.supported_dtypes = set()
+
+    def parse_phi_dtypes(self, registered_info_list, device="GPU"):
+        assert isinstance(registered_info_list, list)
+        assert device in ["CPU", "GPU"]
+
+        # registered_info_list is in format as follows:
+        # ['(GPU, Undefined(AnyLayout), float32)', '(GPU, Undefined(AnyLayout), float64)']
+        for kernel_str in registered_info_list:
+            kernel_strs = (
+                kernel_str.replace("(", "").replace(")", "").split(",")
+            )
+            # for GPU, the kernel type can be GPUDNN.
+            if device in kernel_strs[0]:
+                self.supported_dtypes.add(kernel_strs[-1].replace(" ", ""))
+
+        # if len(self.supported_dtypes) == 0:
+        #    print("-- [WARNING] No dtypes for op_type={}, device={}. Registered info: {}".format(self.op_type, device, registered_info_list))
+
+    def parse_fluid_dtypes(self, registered_info_list, device="gpu"):
+        assert isinstance(registered_info_list, list)
+        assert device in ["cpu", "gpu"]
+
+        # registered_info_list is in format as follows:
+        # ['{data_type[::paddle::platform::bfloat16]; data_layout[Undefined(AnyLayout)]; place[Place(gpu:0)]; library_type[PLAIN]}', ...}']
+        for kernel_str in registered_info_list:
+            kernel_strs = kernel_str.split(";")
+            if "place" in kernel_strs[2] and device in kernel_strs[2]:
+                assert "data_type" in kernel_strs[0]
+                dtype_str = kernel_strs[0].replace("{data_type[", "")
+                dtype_str = dtype_str.replace("::paddle::platform::", "")
+                dtype_str = dtype_str.replace("]", "")
+                self.supported_dtypes.add(dtype_str)
+
+
+class KernelRegistryStatistics(object):
+    def __init__(self):
+        self.num_ops_for_dtypes = {
+            "all": 0,
+            "float32": 0,
+            "float16": 0,
+            "bfloat16": 0,
+        }
+
+    def update(self, supported_dtypes):
+        for dtype in self.num_ops_for_dtypes.keys():
+            if dtype in ["float", "float32"]:
+                self.num_ops_for_dtypes["float32"] += 1
+            elif dtype in supported_dtypes:
+                self.num_ops_for_dtypes[dtype] += 1
+        self.num_ops_for_dtypes["all"] += 1
+
+    def __str__(self):
+        res = "{ "
+        num_floats = int(self.num_ops_for_dtypes["float32"])
+        for dtype, num in self.num_ops_for_dtypes.items():
+            res += "{}: {:4d}".format(dtype, num)
+            if dtype in ["float16", "bfloat16"]:
+                if num_floats != 0:
+                    percent = float(self.num_ops_for_dtypes[dtype]) / float(
+                        num_floats
+                    )
+                    res += "({:.2f}%)".format(percent * 100)
+                else:
+                    res += "({:.2f}%)".format(0)
+            res += " "
+        res += "}"
+        return res
+
+
+def parse_paddle_kernels(lib="phi", kernel_type="function", print_detail=False):
+    assert lib in ["fluid", "phi"]
+
+    if lib == "phi":
+        assert kernel_type in ["function", "structure", "all"]
+        # phi kernel type can be: function, structure, all
+        kernel_infos = paddle.fluid.core._get_registered_phi_kernels(
+            kernel_type
+        )
+    else:
+        # fluid, phi, all
+        assert kernel_type in ["fluid", "phi", "all"]
+        kernel_infos = paddle.fluid.core._get_all_register_op_kernels(
+            kernel_type
+        )
+
+    max_op_type_lengths = 0
+    stats = KernelRegistryStatistics()
+
+    kernel_info_dict = {}
+    for key, value in kernel_infos.items():
+        info = KernelInfo(key)
+        if lib == "phi":
+            info.parse_phi_dtypes(value, device="GPU")
+        else:
+            info.parse_fluid_dtypes(value, device="gpu")
+        kernel_info_dict[key] = info
+        if len(info.op_type) > max_op_type_lengths:
+            max_op_type_lengths = len(info.op_type)
+        stats.update(info.supported_dtypes)
+
+    if print_detail:
+        print(
+            "==================== lib={}, kernel_type={} ====================".format(
+                lib, kernel_type
+            )
+        )
+        print(
+            "{} : {}".format(
+                "op_type".ljust(max_op_type_lengths + 4),
+                "supported_dtypes for GPU",
+            )
+        )
+        for key, value in sorted(kernel_info_dict.items()):
+            print(
+                "{} : {}".format(
+                    value.op_type.ljust(max_op_type_lengths + 4),
+                    value.supported_dtypes,
+                )
+            )
+        print("")
+    return stats
+
+
+def main(lib):
+    assert lib in ["fluid", "phi"]
+
+    print_detail = False
+    if lib == "phi":
+        phi_function_kernels_stats = parse_paddle_kernels(
+            lib, "function", print_detail=False
+        )
+        phi_structure_kernels_stats = parse_paddle_kernels(
+            lib, "structure", print_detail=False
+        )
+        phi_all_kernels_stats = parse_paddle_kernels(
+            lib, "all", print_detail=print_detail
+        )
+        print(
+            "==================================   phi kernels summary   =================================="
+        )
+        print("phi function  kernels : {}".format(phi_function_kernels_stats))
+        print("phi structure kernels : {}".format(phi_structure_kernels_stats))
+        print("phi all       kernels : {}".format(phi_all_kernels_stats))
+        print("")
+    else:
+        fluid_ops_stats = parse_paddle_kernels(lib, "fluid", print_detail=False)
+        phi_ops_stats = parse_paddle_kernels(lib, "phi", print_detail=False)
+        all_ops_stats = parse_paddle_kernels(
+            lib, "all", print_detail=print_detail
+        )
+        print(
+            "================================== fluid operators summary =================================="
+        )
+        print("fluid operators : {}".format(fluid_ops_stats))
+        print("phi   operators : {}".format(phi_ops_stats))
+        print("all   operators : {}".format(all_ops_stats))
+        print("")
+
+
+main(lib="fluid")
+main(lib="phi")
-- 
GitLab