未验证 提交 dc39eb18 编写于 作者: W Wilber 提交者: GitHub

Infrt registers pten kernels (#39588)

* the mlir representation of pten, test=develop

* fixes an error, test=develop

* infrt registers pten kernels
Co-authored-by: NShixiaowei02 <39303645+Shixiaowei02@users.noreply.github.com>
上级 638aab6e
......@@ -52,5 +52,6 @@ paddle/infrt/dialect/pd_ops_info.h
.lit_test_times.txt
paddle/infrt/tests/dialect/Output
paddle/infrt/tests/lit.cfg.py
paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc
paddle/fluid/pybind/eager_final_state_op_function_impl.h
paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h
......@@ -28,13 +28,18 @@
#include "paddle/infrt/tensor/dense_tensor_view.h"
#include "paddle/infrt/tensor/tensor_map.h"
#include "paddle/infrt/tensor/tensor_shape.h"
#include "paddle/pten/core/meta_tensor.h"
#ifdef INFRT_WITH_PTEN
#include "paddle/infrt/backends/host/pten_allocator.h"
#include "paddle/infrt/backends/host/pten_context.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/common/backend.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/common/layout.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/meta_tensor.h"
#endif
namespace infrt {
......@@ -42,31 +47,40 @@ namespace host_context {
struct MlirFunctionExecutable;
using ValueVariantType = Variant<int16_t,
int32_t,
int64_t,
float,
double,
bool,
uint32_t,
uint64_t,
std::string,
tensor::TensorShape,
tensor::DenseHostTensor,
MlirFunctionExecutable*,
tensor::TensorMap,
using ValueVariantType =
Variant<int16_t,
int32_t,
int64_t,
float,
double,
bool,
uint32_t,
uint64_t,
std::string,
tensor::TensorShape,
tensor::DenseHostTensor,
MlirFunctionExecutable*,
tensor::TensorMap,
#ifdef INFRT_WITH_PTEN
::pten::MetaTensor,
::pten::DenseTensor,
backends::CpuPtenAllocator,
backends::CpuPtenContext,
::pten::CPUContext,
::pten::MetaTensor,
::pten::DenseTensor,
backends::CpuPtenAllocator,
backends::CpuPtenContext,
::pten::CPUContext,
std::vector<pten::DenseTensor>,
paddle::experimental::ScalarBase<pten::DenseTensor>,
paddle::experimental::ScalarArrayBase<pten::DenseTensor>,
std::vector<pten::MetaTensor>,
pten::MetaConfig,
paddle::experimental::Backend,
paddle::experimental::DataLayout,
paddle::experimental::DataType,
#endif
std::vector<int16_t>,
std::vector<int32_t>,
std::vector<int64_t>,
std::vector<float>,
std::vector<double>>;
std::vector<int16_t>,
std::vector<int32_t>,
std::vector<int64_t>,
std::vector<float>,
std::vector<double>>;
//! Copy content from \param from to \param to.
void CopyTo(const Value& from, Value* to);
......
......@@ -11,9 +11,21 @@ gather_srcs(infrt_src SRCS
allocator_kernels.cc
)
set(infrt_register_pten_kernels_gen_source_file ${CMAKE_SOURCE_DIR}/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc)
set(infrt_register_pten_kernels_gen_file ${CMAKE_SOURCE_DIR}/tools/infrt/get_pten_kernel_function.sh)
set(wrapped_infermeta_header_file ${CMAKE_SOURCE_DIR}/paddle/pten/infermeta/generated.h)
set(wrapped_infermeta_source_file ${CMAKE_SOURCE_DIR}/paddle/pten/infermeta/generated.cc)
add_custom_command(
OUTPUT ${infrt_register_pten_kernels_gen_source_file}
COMMAND sh ${infrt_register_pten_kernels_gen_file}
DEPENDS ${wrapped_infermeta_header_file} ${wrapped_infermeta_source_file}
COMMENT "infrt generate ${infrt_register_pten_kernels_gen_source_file}"
VERBATIM)
cc_library(infrt_naive SRCS infershaped/infershaped_kernel_launcher.cc
infershaped/infershaped_kernel_launchers.cc
)
DEPS pten wrapped_infermeta)
cc_test_tiny(test_infrt_infershape_launchers SRCS
infershaped/infershape_launchers_test.cc DEPS infrt)
......@@ -17,6 +17,10 @@
#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h"
#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h"
#include "paddle/infrt/kernel/pten/infershaped/infershaped_utils.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/place.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/meta_tensor.h"
namespace infrt {
namespace kernel {
......@@ -33,23 +37,62 @@ TEST(utils, registry) {
CHECK_EQ(count, 2U);
}
class FancyAllocator : public pten::Allocator {
public:
static void Delete(pten::Allocation* allocation) {
::operator delete(allocation->ptr());
}
AllocationPtr Allocate(size_t bytes_size) override {
void* data = ::operator new(bytes_size);
auto* allocation = new pten::Allocation(data, bytes_size, pten::CPUPlace());
return AllocationPtr(allocation, Delete);
}
};
TEST(ElementwiseAdd, launcher_registry) {
host_context::KernelRegistry registry;
RegisterInferShapeLaunchers(&registry);
ASSERT_EQ(registry.size(), 1UL);
auto creator = registry.GetKernel("elementwise_add");
ASSERT_GE(registry.size(), 1UL);
auto creator = registry.GetKernel("add.cpu.any.fp32");
const pten::DDim dims({1, 2});
const pten::DataType dtype{pten::DataType::FLOAT32};
const pten::DataLayout layout{pten::DataLayout::NHWC};
const pten::LoD lod{};
pten::DenseTensorMeta meta(dtype, dims, layout, lod);
auto fancy_allocator = std::unique_ptr<pten::Allocator>(new FancyAllocator);
auto* alloc = fancy_allocator.get();
::pten::CPUContext ctx{};
::pten::DenseTensor a{};
::pten::DenseTensor b{};
::pten::DenseTensor c{};
pten::DenseTensor a(alloc, meta);
pten::DenseTensor b(alloc, meta);
pten::DenseTensor c(alloc, meta);
auto place = pten::CPUPlace();
float* a_data = a.mutable_data<float>(place);
float* b_data = b.mutable_data<float>(place);
float* c_data = c.mutable_data<float>(place);
for (size_t i = 0; i < 2; ++i) {
a_data[i] = 1.f;
b_data[i] = 2.f;
}
pten::CPUContext context;
context.SetAllocator(alloc);
context.Init();
host_context::KernelFrameBuilder kernel_frame_builder;
kernel_frame_builder.AddArgument(new host_context::Value(std::move(ctx)));
kernel_frame_builder.AddArgument(new host_context::Value(std::move(context)));
kernel_frame_builder.AddArgument(new host_context::Value(std::move(a)));
kernel_frame_builder.AddArgument(new host_context::Value(std::move(b)));
kernel_frame_builder.SetResults({new host_context::Value(std::move(c))});
creator(&kernel_frame_builder);
for (size_t i = 0; i < 2; ++i) {
CHECK_EQ(c_data[i], 3.f);
}
}
} // namespace kernel
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h"
#include "paddle/pten/core/dense_tensor.h"
namespace infrt {
namespace kernel {
......@@ -26,6 +27,9 @@ void InferShapedKernelLauncher::CreateKernelFrameForInferShape(
values.emplace_back(
::pten::MetaTensor{&value->get<::pten::DenseTensor>()});
infershape_kernel_frame_builder.AddArgument(values.back().get());
} else if (value->is_type<pten::DenseTensor>()) {
values.emplace_back(pten::MetaTensor{&value->get<pten::DenseTensor>()});
infershape_kernel_frame_builder.AddArgument(values.back().get());
} else {
infershape_kernel_frame_builder.AddArgument(value);
}
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h"
#include "paddle/infrt/kernel/pten/infershaped/elementwise_add.h"
namespace infrt {
namespace kernel {
void RegisterInferShapeLaunchers(host_context::KernelRegistry* registry) {
registry->AddKernel(
"elementwise_add",
std::bind(&KernelLauncherFunc<decltype(&ElementwiseAdd),
&ElementwiseAdd,
decltype(&ElementwiseAddInferShape),
&ElementwiseAddInferShape>,
KernelLauncher<decltype(&ElementwiseAdd),
&ElementwiseAdd,
decltype(&ElementwiseAddInferShape),
&ElementwiseAddInferShape>(),
std::placeholders::_1));
}
} // namespace kernel
} // namespace infrt
......@@ -16,6 +16,7 @@
#include <type_traits>
#include "paddle/infrt/tensor/dense_host_tensor.h"
#include "paddle/pten/core/dense_tensor.h"
namespace infrt {
namespace kernel {
......
......@@ -19,21 +19,9 @@
#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h"
#include "paddle/infrt/kernel/pten/infershaped/infershaped_utils.h"
// This file contains a example of the infershape ElementwiseAdd kernel.
// Some of the following code should be generated from PTEN by script.
namespace infrt {
namespace kernel {
static void ElementwiseAddInferShape(const ::pten::MetaTensor& a,
const ::pten::MetaTensor& b,
::pten::MetaTensor* c) {}
static void ElementwiseAdd(const ::pten::CPUContext& /*Context*/,
const ::pten::DenseTensor& a,
const ::pten::DenseTensor& b,
::pten::DenseTensor* c) {}
template <typename KernelFunc,
KernelFunc kernel,
typename InferShapedFunc,
......@@ -55,7 +43,6 @@ class KernelLauncher : public InferShapedKernelLauncher {
BuildInferShapeCache(num_input_tensors);
}
}
::infrt::host_context::KernelImpl<KernelFunc, kernel>::Invoke(frame);
}
};
......
......@@ -22,7 +22,7 @@
#include "paddle/infrt/kernel/pten/allocator_kernels.h"
#include "paddle/infrt/kernel/pten/context_kernels.h"
#include "paddle/infrt/kernel/pten/dense_tensor_kernels.h"
#include "paddle/infrt/kernel/pten/infershaped/elementwise_add.h"
#include "paddle/infrt/kernel/pten/infershaped/pten_kernel_launcher.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/include/kernels.h"
#include "paddle/pten/kernels/matmul_kernel.h"
......
......@@ -22,14 +22,14 @@ set -e
#step 1:get kernel registered info
kernel_register_info_file=`mktemp`
PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )"
PADDLE_ROOT="$( cd "$( dirname "$0" )/../../" && pwd )"
unset GREP_OPTIONS && find ${PADDLE_ROOT}/paddle/pten/kernels -name "*.c*" \
| xargs sed -e '/PT_REGISTER_\(GENERAL_\)\?KERNEL(/,/)/!d' \
| awk 'BEGIN { RS="{" }{ gsub(/\n /,""); print $0 }' \
| grep PT_REGISTER \
| awk -F ",|\(" '{gsub(/ /,"");print $2, $3, $4, $5}' \
| sort -u | awk '{gsub(/pten::/,"");print $0}' \
| grep -v "_grad" > $kernel_register_info_file
| xargs sed -e '/PT_REGISTER_\(GENERAL_\)\?KERNEL(/,/)/!d' \
| awk 'BEGIN { RS="{" }{ gsub(/\n /,""); print $0 }' \
| grep PT_REGISTER \
| awk -F ",|\(|\)" '{gsub(/ /,"");$1="";print}' \
| sort -u | awk '{gsub(/pten::/,"");gsub(/paddle::platform::/,"");gsub(/dtype::/,"");gsub(/paddle::/,"");print $0}' \
| grep -v "_grad" > $kernel_register_info_file
#step 2:get simple general inferMeta function wrap info
temp_path=`mktemp -d`
......@@ -49,4 +49,5 @@ grep PT_REGISTER_INFER_META_FN ${temp_path}/generate.cc \
python3 ${PADDLE_ROOT}/tools/infrt/get_pten_kernel_info.py \
--paddle_root_path ${PADDLE_ROOT} \
--kernel_info_file $kernel_register_info_file \
--infermeta_wrap_file ${temp_path}/wrap_info.txt
--infermeta_wrap_file ${temp_path}/wrap_info.txt \
--generate_file ${PADDLE_ROOT}/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc
......@@ -17,6 +17,7 @@
import argparse
import json
import yaml
from typing import List, Dict, Any
def parse_args():
......@@ -25,17 +26,23 @@ def parse_args():
"--paddle_root_path",
type=str,
required=True,
help="root path of paddle src[WORK_PATH/Paddle] .")
help="root path of paddle src[WORK_PATH/Paddle].")
parser.add_argument(
"--kernel_info_file",
type=str,
required=True,
help="kernel info file generated by get_pten_kernel_function.sh .")
help="kernel info file generated by get_pten_kernel_function.sh.")
parser.add_argument(
"--infermeta_wrap_file",
type=str,
required=True,
help="inferMeta wrap info file .")
help="inferMeta wrap info file.")
parser.add_argument(
"--generate_file",
type=str,
required=True,
default="../paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc",
help="generated file.")
args = parser.parse_args()
return args
......@@ -76,10 +83,179 @@ def merge(infer_meta_data, kernel_data, wrap_data):
return full_kernel_data
def gen_warn_info():
return """// Generated by tools/infrt/gen_pten_kernel_register.py for infrt.
// DO NOT edit or include it within paddle.
"""
def gen_include_headers():
return """
#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h"
#include "paddle/infrt/kernel/pten/infershaped/pten_kernel_launcher.h"
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/include/kernels.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/infermeta/generated.h"
"""
def gen_namespace():
return ("""
namespace infrt {
namespace kernel {
""", """
} // namespace kernel
} // namespace infrt
""")
def gen_context(val):
if val == "CPU":
return "pten::CPUContext"
# elif val == "GPU":
# return "pten::GPUContext"
# elif val == "XPU":
# return "pten::XPUContext"
else:
# raise Exception(f"Unknown context type {val}")
return ""
def gen_layout(val):
if val == "ALL_LAYOUT":
return 'any'
else:
# TODO(wilber): now only process ALL_LAYOUT
raise Exception(f"Unknown layout type {val}")
def gen_kernel_func(val, ctx_name, dtype_name):
if '<' in val and '>' in val:
st = val.index('<')
ed = val.index('>')
func_name = val[:st]
template_name = val[st + 1:ed]
if 'pten::' in template_name:
return "&pten::" + val
else:
return "&pten::" + func_name + "<pten::" + template_name + ">"
else:
return "&pten::" + val + "<" + dtype_name + ", " + ctx_name + ">"
def gen_dtype(vals: List[str]):
ir_dtypes, origin_dtypes = [], []
for val in vals:
if val == "float":
ir_dtypes.append("fp32")
origin_dtypes.append("float")
elif val == "double":
ir_dtypes.append("fp64")
origin_dtypes.append("double")
elif val == "float16":
ir_dtypes.append("fp16")
origin_dtypes.append("paddle::experimental::float16")
elif val == "bfloat16":
ir_dtypes.append("bf16")
origin_dtypes.append("paddle::experimental::bfloat16")
elif val == "bool":
ir_dtypes.append("int1")
origin_dtypes.append("bool")
elif val == "int8_t":
ir_dtypes.append("int8")
origin_dtypes.append("int8_t")
elif val == "uint8_t":
ir_dtypes.append("uint8")
origin_dtypes.append("uint8_t")
elif val == "int16_t":
ir_dtypes.append("int16")
origin_dtypes.append("int16_t")
elif val == "int" or val == "int32_t":
ir_dtypes.append("int32")
origin_dtypes.append("int32_t")
elif val == "int64_t":
ir_dtypes.append("int64")
origin_dtypes.append("int64_t")
elif val == "complex<float>" or val == "complex64":
ir_dtypes.append("complex64")
origin_dtypes.append("paddle::experimental::complex64")
elif val == "complex<double>" or val == "complex128":
ir_dtypes.append("complex128")
origin_dtypes.append("paddle::experimental::complex128")
elif val == "ALL_DTYPE":
ir_dtypes.append("all")
origin_dtypes.append("all")
else:
if "VA_ARGS" in val:
continue
raise Exception(f"Unknown data type {val}")
return ir_dtypes, origin_dtypes
# TODO(wilber): Now only process CPUContext.
def gen_register_info(resources: List[List[str]]):
"""
resources: [['add', 'CPU', 'ALL_LAYOUT', 'AddKernel', 'float', 'double', '...'(varaidic types), 'ElementwiseInferMeta'], ...]
"""
res = "void RegisterInferShapeLaunchers(host_context::KernelRegistry* registry) {"
for item in resources:
# The output string is polluted by C++ macros, here the \ is removed
update_item = [v.strip('\\') for v in item]
ctx_name = gen_context(update_item[1])
if (ctx_name == ""):
continue
update_item[2] = gen_layout(update_item[2])
ir_dtypes, origin_dtypes = gen_dtype(update_item[4:-1])
infer_shape_func = "&pten::" + update_item[-1]
if update_item[-1] == "unknown":
# TODO(wilber): handle the unknown inferShape func.
continue
for ir_dtype, origin_dtype in zip(ir_dtypes, origin_dtypes):
kernel_func = gen_kernel_func(update_item[3], ctx_name,
origin_dtype)
ir_name = '.'.join(
[it.lower() for it in update_item[:3]]) + "." + ir_dtype
res += f"""
registry->AddKernel("{ir_name}","""
res += f"""
std::bind(&KernelLauncherFunc<decltype({kernel_func}),
{kernel_func},
decltype({infer_shape_func}),
{infer_shape_func}>,
KernelLauncher<decltype({kernel_func}),
{kernel_func},
decltype({infer_shape_func}),
{infer_shape_func}>(),
std::placeholders::_1));
"""
res += "\n}"
return res
def gen_pten_kernel_register_code(resources: List[List[str]],
src_file_path: str):
source_file = open(src_file_path, 'w')
source_file.write(gen_warn_info())
source_file.write(gen_include_headers())
namespace = gen_namespace()
source_file.write(namespace[0])
source_file.write(gen_register_info(resources))
source_file.write(namespace[1])
source_file.close()
if __name__ == "__main__":
args = parse_args()
infer_meta_data = get_api_yaml_info(args.paddle_root_path)
kernel_data = get_kernel_info(args.kernel_info_file)
info_meta_wrap_data = get_kernel_info(args.infermeta_wrap_file)
out = merge(infer_meta_data, kernel_data, info_meta_wrap_data)
print(json.dumps(out))
gen_pten_kernel_register_code(out, args.generate_file)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册