diff --git a/.gitignore b/.gitignore index 13f6a427ac6e13bb2495168c4d6c9a04212ca87f..7d20fce7c9938cb003dec09f560e652135e68cf1 100644 --- a/.gitignore +++ b/.gitignore @@ -52,5 +52,6 @@ paddle/infrt/dialect/pd_ops_info.h .lit_test_times.txt paddle/infrt/tests/dialect/Output paddle/infrt/tests/lit.cfg.py +paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc paddle/fluid/pybind/eager_final_state_op_function_impl.h paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h diff --git a/paddle/infrt/host_context/value.h b/paddle/infrt/host_context/value.h index f623e141512ce4777de6f8a9232db002a08facf0..f0478583f7cfd5d372d83c36e51e13a3209e8306 100644 --- a/paddle/infrt/host_context/value.h +++ b/paddle/infrt/host_context/value.h @@ -28,13 +28,18 @@ #include "paddle/infrt/tensor/dense_tensor_view.h" #include "paddle/infrt/tensor/tensor_map.h" #include "paddle/infrt/tensor/tensor_shape.h" -#include "paddle/pten/core/meta_tensor.h" #ifdef INFRT_WITH_PTEN #include "paddle/infrt/backends/host/pten_allocator.h" #include "paddle/infrt/backends/host/pten_context.h" -#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/backends/all_context.h" +#include "paddle/pten/common/backend.h" +#include "paddle/pten/common/data_type.h" +#include "paddle/pten/common/layout.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/meta_tensor.h" #endif namespace infrt { @@ -42,31 +47,40 @@ namespace host_context { struct MlirFunctionExecutable; -using ValueVariantType = Variant, + paddle::experimental::ScalarBase, + paddle::experimental::ScalarArrayBase, + std::vector, + pten::MetaConfig, + paddle::experimental::Backend, + paddle::experimental::DataLayout, + paddle::experimental::DataType, #endif - std::vector, - std::vector, - std::vector, - std::vector, - std::vector>; + std::vector, + std::vector, + std::vector, + std::vector, + std::vector>; //! Copy content from \param from to \param to. void CopyTo(const Value& from, Value* to); diff --git a/paddle/infrt/kernel/pten/CMakeLists.txt b/paddle/infrt/kernel/pten/CMakeLists.txt index 65c10b0b15f8dc32b9b167cf24bc02189b58c084..88cb09211e5ae9b9a135214fff16b37b444a529a 100644 --- a/paddle/infrt/kernel/pten/CMakeLists.txt +++ b/paddle/infrt/kernel/pten/CMakeLists.txt @@ -11,9 +11,21 @@ gather_srcs(infrt_src SRCS allocator_kernels.cc ) +set(infrt_register_pten_kernels_gen_source_file ${CMAKE_SOURCE_DIR}/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc) +set(infrt_register_pten_kernels_gen_file ${CMAKE_SOURCE_DIR}/tools/infrt/get_pten_kernel_function.sh) +set(wrapped_infermeta_header_file ${CMAKE_SOURCE_DIR}/paddle/pten/infermeta/generated.h) +set(wrapped_infermeta_source_file ${CMAKE_SOURCE_DIR}/paddle/pten/infermeta/generated.cc) + +add_custom_command( + OUTPUT ${infrt_register_pten_kernels_gen_source_file} + COMMAND sh ${infrt_register_pten_kernels_gen_file} + DEPENDS ${wrapped_infermeta_header_file} ${wrapped_infermeta_source_file} + COMMENT "infrt generate ${infrt_register_pten_kernels_gen_source_file}" + VERBATIM) + cc_library(infrt_naive SRCS infershaped/infershaped_kernel_launcher.cc infershaped/infershaped_kernel_launchers.cc - ) + DEPS pten wrapped_infermeta) cc_test_tiny(test_infrt_infershape_launchers SRCS infershaped/infershape_launchers_test.cc DEPS infrt) diff --git a/paddle/infrt/kernel/pten/infershaped/infershape_launchers_test.cc b/paddle/infrt/kernel/pten/infershaped/infershape_launchers_test.cc index 64b99110d94c7a5e9a25f175194ea2b11e98b5f0..93836cf5716d64da3f0f44c2e0031bda5436cd2c 100644 --- a/paddle/infrt/kernel/pten/infershaped/infershape_launchers_test.cc +++ b/paddle/infrt/kernel/pten/infershaped/infershape_launchers_test.cc @@ -17,6 +17,10 @@ #include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h" #include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h" #include "paddle/infrt/kernel/pten/infershaped/infershaped_utils.h" +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/common/place.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/meta_tensor.h" namespace infrt { namespace kernel { @@ -33,23 +37,62 @@ TEST(utils, registry) { CHECK_EQ(count, 2U); } +class FancyAllocator : public pten::Allocator { + public: + static void Delete(pten::Allocation* allocation) { + ::operator delete(allocation->ptr()); + } + + AllocationPtr Allocate(size_t bytes_size) override { + void* data = ::operator new(bytes_size); + auto* allocation = new pten::Allocation(data, bytes_size, pten::CPUPlace()); + return AllocationPtr(allocation, Delete); + } +}; + TEST(ElementwiseAdd, launcher_registry) { host_context::KernelRegistry registry; RegisterInferShapeLaunchers(®istry); - ASSERT_EQ(registry.size(), 1UL); - auto creator = registry.GetKernel("elementwise_add"); + ASSERT_GE(registry.size(), 1UL); + auto creator = registry.GetKernel("add.cpu.any.fp32"); + + const pten::DDim dims({1, 2}); + const pten::DataType dtype{pten::DataType::FLOAT32}; + const pten::DataLayout layout{pten::DataLayout::NHWC}; + const pten::LoD lod{}; + pten::DenseTensorMeta meta(dtype, dims, layout, lod); + + auto fancy_allocator = std::unique_ptr(new FancyAllocator); + auto* alloc = fancy_allocator.get(); - ::pten::CPUContext ctx{}; - ::pten::DenseTensor a{}; - ::pten::DenseTensor b{}; - ::pten::DenseTensor c{}; + pten::DenseTensor a(alloc, meta); + pten::DenseTensor b(alloc, meta); + pten::DenseTensor c(alloc, meta); + + auto place = pten::CPUPlace(); + float* a_data = a.mutable_data(place); + float* b_data = b.mutable_data(place); + float* c_data = c.mutable_data(place); + for (size_t i = 0; i < 2; ++i) { + a_data[i] = 1.f; + b_data[i] = 2.f; + } + + pten::CPUContext context; + context.SetAllocator(alloc); + context.Init(); host_context::KernelFrameBuilder kernel_frame_builder; - kernel_frame_builder.AddArgument(new host_context::Value(std::move(ctx))); + kernel_frame_builder.AddArgument(new host_context::Value(std::move(context))); kernel_frame_builder.AddArgument(new host_context::Value(std::move(a))); kernel_frame_builder.AddArgument(new host_context::Value(std::move(b))); kernel_frame_builder.SetResults({new host_context::Value(std::move(c))}); + creator(&kernel_frame_builder); + + for (size_t i = 0; i < 2; ++i) { + CHECK_EQ(c_data[i], 3.f); + } } } // namespace kernel diff --git a/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.cc b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.cc index 80f8bae4018cbde3dc3a8d2c1331691041425327..4d91cda04152f14927d2ddb5565a0626837ea16e 100644 --- a/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.cc +++ b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h" +#include "paddle/pten/core/dense_tensor.h" namespace infrt { namespace kernel { @@ -26,6 +27,9 @@ void InferShapedKernelLauncher::CreateKernelFrameForInferShape( values.emplace_back( ::pten::MetaTensor{&value->get<::pten::DenseTensor>()}); infershape_kernel_frame_builder.AddArgument(values.back().get()); + } else if (value->is_type()) { + values.emplace_back(pten::MetaTensor{&value->get()}); + infershape_kernel_frame_builder.AddArgument(values.back().get()); } else { infershape_kernel_frame_builder.AddArgument(value); } diff --git a/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc deleted file mode 100644 index 23d4f919af0571d566d37b618711708d48b88365..0000000000000000000000000000000000000000 --- a/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h" -#include "paddle/infrt/kernel/pten/infershaped/elementwise_add.h" - -namespace infrt { -namespace kernel { - -void RegisterInferShapeLaunchers(host_context::KernelRegistry* registry) { - registry->AddKernel( - "elementwise_add", - std::bind(&KernelLauncherFunc, - KernelLauncher(), - std::placeholders::_1)); -} - -} // namespace kernel -} // namespace infrt diff --git a/paddle/infrt/kernel/pten/infershaped/infershaped_utils.h b/paddle/infrt/kernel/pten/infershaped/infershaped_utils.h index aa5e900b8b26a99aa0401deb61960bf3ec923e81..e6e3091bd0e9f545d663842f9824c26c40489bc5 100644 --- a/paddle/infrt/kernel/pten/infershaped/infershaped_utils.h +++ b/paddle/infrt/kernel/pten/infershaped/infershaped_utils.h @@ -16,6 +16,7 @@ #include #include "paddle/infrt/tensor/dense_host_tensor.h" +#include "paddle/pten/core/dense_tensor.h" namespace infrt { namespace kernel { diff --git a/paddle/infrt/kernel/pten/infershaped/elementwise_add.h b/paddle/infrt/kernel/pten/infershaped/pten_kernel_launcher.h similarity index 79% rename from paddle/infrt/kernel/pten/infershaped/elementwise_add.h rename to paddle/infrt/kernel/pten/infershaped/pten_kernel_launcher.h index 1d9d0106da539b4fd071b551dd693cdc6eaaf528..9a3e978e966b0702ef29623da6578a3858f8cc64 100644 --- a/paddle/infrt/kernel/pten/infershaped/elementwise_add.h +++ b/paddle/infrt/kernel/pten/infershaped/pten_kernel_launcher.h @@ -19,21 +19,9 @@ #include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h" #include "paddle/infrt/kernel/pten/infershaped/infershaped_utils.h" -// This file contains a example of the infershape ElementwiseAdd kernel. -// Some of the following code should be generated from PTEN by script. - namespace infrt { namespace kernel { -static void ElementwiseAddInferShape(const ::pten::MetaTensor& a, - const ::pten::MetaTensor& b, - ::pten::MetaTensor* c) {} - -static void ElementwiseAdd(const ::pten::CPUContext& /*Context*/, - const ::pten::DenseTensor& a, - const ::pten::DenseTensor& b, - ::pten::DenseTensor* c) {} - template ::Invoke(frame); } }; diff --git a/paddle/infrt/kernel/pten/registry.cc b/paddle/infrt/kernel/pten/registry.cc index 888992c47d968666bf531a751b85a996a77e270d..1b410b5fc400838dd0c5f97783a4d9fee650bb1d 100644 --- a/paddle/infrt/kernel/pten/registry.cc +++ b/paddle/infrt/kernel/pten/registry.cc @@ -22,7 +22,7 @@ #include "paddle/infrt/kernel/pten/allocator_kernels.h" #include "paddle/infrt/kernel/pten/context_kernels.h" #include "paddle/infrt/kernel/pten/dense_tensor_kernels.h" -#include "paddle/infrt/kernel/pten/infershaped/elementwise_add.h" +#include "paddle/infrt/kernel/pten/infershaped/pten_kernel_launcher.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/kernels.h" #include "paddle/pten/kernels/matmul_kernel.h" diff --git a/tools/infrt/get_pten_kernel_function.sh b/tools/infrt/get_pten_kernel_function.sh old mode 100755 new mode 100644 index 0d787d9930b2c739733e8431eaccece88519248a..52abcf7d5559c83d2218fe480a9d4d7b0d8cf9f1 --- a/tools/infrt/get_pten_kernel_function.sh +++ b/tools/infrt/get_pten_kernel_function.sh @@ -22,14 +22,14 @@ set -e #step 1:get kernel registered info kernel_register_info_file=`mktemp` -PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )" +PADDLE_ROOT="$( cd "$( dirname "$0" )/../../" && pwd )" unset GREP_OPTIONS && find ${PADDLE_ROOT}/paddle/pten/kernels -name "*.c*" \ - | xargs sed -e '/PT_REGISTER_\(GENERAL_\)\?KERNEL(/,/)/!d' \ - | awk 'BEGIN { RS="{" }{ gsub(/\n /,""); print $0 }' \ - | grep PT_REGISTER \ - | awk -F ",|\(" '{gsub(/ /,"");print $2, $3, $4, $5}' \ - | sort -u | awk '{gsub(/pten::/,"");print $0}' \ - | grep -v "_grad" > $kernel_register_info_file + | xargs sed -e '/PT_REGISTER_\(GENERAL_\)\?KERNEL(/,/)/!d' \ + | awk 'BEGIN { RS="{" }{ gsub(/\n /,""); print $0 }' \ + | grep PT_REGISTER \ + | awk -F ",|\(|\)" '{gsub(/ /,"");$1="";print}' \ + | sort -u | awk '{gsub(/pten::/,"");gsub(/paddle::platform::/,"");gsub(/dtype::/,"");gsub(/paddle::/,"");print $0}' \ + | grep -v "_grad" > $kernel_register_info_file #step 2:get simple general inferMeta function wrap info temp_path=`mktemp -d` @@ -49,4 +49,5 @@ grep PT_REGISTER_INFER_META_FN ${temp_path}/generate.cc \ python3 ${PADDLE_ROOT}/tools/infrt/get_pten_kernel_info.py \ --paddle_root_path ${PADDLE_ROOT} \ --kernel_info_file $kernel_register_info_file \ - --infermeta_wrap_file ${temp_path}/wrap_info.txt + --infermeta_wrap_file ${temp_path}/wrap_info.txt \ + --generate_file ${PADDLE_ROOT}/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc diff --git a/tools/infrt/get_pten_kernel_info.py b/tools/infrt/get_pten_kernel_info.py index e311464130008e9c7815c028f69b2d29eef3b349..71c77785b82cde9f8337550115cd7277abf5e52e 100644 --- a/tools/infrt/get_pten_kernel_info.py +++ b/tools/infrt/get_pten_kernel_info.py @@ -17,6 +17,7 @@ import argparse import json import yaml +from typing import List, Dict, Any def parse_args(): @@ -25,17 +26,23 @@ def parse_args(): "--paddle_root_path", type=str, required=True, - help="root path of paddle src[WORK_PATH/Paddle] .") + help="root path of paddle src[WORK_PATH/Paddle].") parser.add_argument( "--kernel_info_file", type=str, required=True, - help="kernel info file generated by get_pten_kernel_function.sh .") + help="kernel info file generated by get_pten_kernel_function.sh.") parser.add_argument( "--infermeta_wrap_file", type=str, required=True, - help="inferMeta wrap info file .") + help="inferMeta wrap info file.") + parser.add_argument( + "--generate_file", + type=str, + required=True, + default="../paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc", + help="generated file.") args = parser.parse_args() return args @@ -76,10 +83,179 @@ def merge(infer_meta_data, kernel_data, wrap_data): return full_kernel_data +def gen_warn_info(): + return """// Generated by tools/infrt/gen_pten_kernel_register.py for infrt. +// DO NOT edit or include it within paddle. +""" + + +def gen_include_headers(): + return """ +#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h" +#include "paddle/infrt/kernel/pten/infershaped/pten_kernel_launcher.h" +#include "paddle/pten/backends/all_context.h" +#include "paddle/pten/include/kernels.h" +#include "paddle/pten/include/infermeta.h" +#include "paddle/pten/infermeta/generated.h" +""" + + +def gen_namespace(): + return (""" +namespace infrt { +namespace kernel { + +""", """ + +} // namespace kernel +} // namespace infrt +""") + + +def gen_context(val): + if val == "CPU": + return "pten::CPUContext" + # elif val == "GPU": + # return "pten::GPUContext" + # elif val == "XPU": + # return "pten::XPUContext" + else: + # raise Exception(f"Unknown context type {val}") + return "" + + +def gen_layout(val): + if val == "ALL_LAYOUT": + return 'any' + else: + # TODO(wilber): now only process ALL_LAYOUT + raise Exception(f"Unknown layout type {val}") + + +def gen_kernel_func(val, ctx_name, dtype_name): + if '<' in val and '>' in val: + st = val.index('<') + ed = val.index('>') + func_name = val[:st] + template_name = val[st + 1:ed] + if 'pten::' in template_name: + return "&pten::" + val + else: + return "&pten::" + func_name + "" + else: + return "&pten::" + val + "<" + dtype_name + ", " + ctx_name + ">" + + +def gen_dtype(vals: List[str]): + ir_dtypes, origin_dtypes = [], [] + for val in vals: + if val == "float": + ir_dtypes.append("fp32") + origin_dtypes.append("float") + elif val == "double": + ir_dtypes.append("fp64") + origin_dtypes.append("double") + elif val == "float16": + ir_dtypes.append("fp16") + origin_dtypes.append("paddle::experimental::float16") + elif val == "bfloat16": + ir_dtypes.append("bf16") + origin_dtypes.append("paddle::experimental::bfloat16") + elif val == "bool": + ir_dtypes.append("int1") + origin_dtypes.append("bool") + elif val == "int8_t": + ir_dtypes.append("int8") + origin_dtypes.append("int8_t") + elif val == "uint8_t": + ir_dtypes.append("uint8") + origin_dtypes.append("uint8_t") + elif val == "int16_t": + ir_dtypes.append("int16") + origin_dtypes.append("int16_t") + elif val == "int" or val == "int32_t": + ir_dtypes.append("int32") + origin_dtypes.append("int32_t") + elif val == "int64_t": + ir_dtypes.append("int64") + origin_dtypes.append("int64_t") + elif val == "complex" or val == "complex64": + ir_dtypes.append("complex64") + origin_dtypes.append("paddle::experimental::complex64") + elif val == "complex" or val == "complex128": + ir_dtypes.append("complex128") + origin_dtypes.append("paddle::experimental::complex128") + elif val == "ALL_DTYPE": + ir_dtypes.append("all") + origin_dtypes.append("all") + else: + if "VA_ARGS" in val: + continue + raise Exception(f"Unknown data type {val}") + return ir_dtypes, origin_dtypes + + +# TODO(wilber): Now only process CPUContext. +def gen_register_info(resources: List[List[str]]): + """ + resources: [['add', 'CPU', 'ALL_LAYOUT', 'AddKernel', 'float', 'double', '...'(varaidic types), 'ElementwiseInferMeta'], ...] + """ + res = "void RegisterInferShapeLaunchers(host_context::KernelRegistry* registry) {" + for item in resources: + # The output string is polluted by C++ macros, here the \ is removed + update_item = [v.strip('\\') for v in item] + + ctx_name = gen_context(update_item[1]) + if (ctx_name == ""): + continue + update_item[2] = gen_layout(update_item[2]) + ir_dtypes, origin_dtypes = gen_dtype(update_item[4:-1]) + infer_shape_func = "&pten::" + update_item[-1] + + if update_item[-1] == "unknown": + # TODO(wilber): handle the unknown inferShape func. + continue + + for ir_dtype, origin_dtype in zip(ir_dtypes, origin_dtypes): + kernel_func = gen_kernel_func(update_item[3], ctx_name, + origin_dtype) + ir_name = '.'.join( + [it.lower() for it in update_item[:3]]) + "." + ir_dtype + res += f""" + registry->AddKernel("{ir_name}",""" + + res += f""" + std::bind(&KernelLauncherFunc, + KernelLauncher(), + std::placeholders::_1)); +""" + + res += "\n}" + return res + + +def gen_pten_kernel_register_code(resources: List[List[str]], + src_file_path: str): + source_file = open(src_file_path, 'w') + source_file.write(gen_warn_info()) + source_file.write(gen_include_headers()) + namespace = gen_namespace() + source_file.write(namespace[0]) + source_file.write(gen_register_info(resources)) + source_file.write(namespace[1]) + source_file.close() + + if __name__ == "__main__": args = parse_args() infer_meta_data = get_api_yaml_info(args.paddle_root_path) kernel_data = get_kernel_info(args.kernel_info_file) info_meta_wrap_data = get_kernel_info(args.infermeta_wrap_file) out = merge(infer_meta_data, kernel_data, info_meta_wrap_data) - print(json.dumps(out)) + gen_pten_kernel_register_code(out, args.generate_file)