未验证 提交 07dad6d6 编写于 作者: H huzhiqiang 提交者: GitHub

[Infrt]add phi kernel dialect (#39726)

上级 4a4215ff
......@@ -49,6 +49,9 @@ tools/__pycache__
# This file is automatically generated.
# TODO(zhiqiang) Move this file to build directory.
paddle/infrt/dialect/pd_ops.td
paddle/infrt/dialect/phi/ir/phi_cpu_kernels.td
paddle/infrt/dialect/phi/ir/phi_gpu_kernels.td
tools/infrt/kernels.json
paddle/infrt/dialect/pd_ops_info.h
.lit_test_times.txt
paddle/infrt/tests/dialect/Output
......
......@@ -49,24 +49,30 @@ int main(int argc, char **argv) {
if (kernel_signature_map.Has(op_kernel_pair.first)) {
std::cout << "\"" << op_kernel_pair.first << "\":{";
auto &args = kernel_signature_map.Get(op_kernel_pair.first).args;
std::cout << "\"inputs\":[";
for (auto name : std::get<0>(args)) {
std::cout << "\"" << name << "\",";
auto inputs_ = std::get<0>(args);
if (inputs_.size() > 0) std::cout << inputs_[0];
for (size_t i = 1; i < inputs_.size(); i++) {
std::cout << ",\"" << inputs_[i] << "\"";
}
if (std::get<0>(args).size() > 0) std::cout << "\b";
std::cout << "],\"attrs\":[";
for (auto name : std::get<1>(args)) {
std::cout << "\"" << name << "\",";
auto attrs_ = std::get<1>(args);
if (attrs_.size() > 0) std::cout << attrs_[0];
for (size_t i = 1; i < attrs_.size(); i++) {
std::cout << ",\"" << attrs_[i] << "\"";
}
if (std::get<1>(args).size() > 0) std::cout << "\b";
std::cout << "],\"outputs\":[";
for (auto name : std::get<2>(args)) {
std::cout << "\"" << name << "\",";
auto outputs_ = std::get<2>(args);
for (size_t i = 1; i < outputs_.size(); i++) {
std::cout << ",\"" << outputs_[i] << "\"";
}
if (std::get<2>(args).size() > 0) std::cout << "\b";
std::cout << "]},";
}
}
std::cout << "\b}" << std::endl;
std::cout << "}" << std::endl;
return 0;
}
......@@ -21,8 +21,22 @@
namespace infrt {
enum class TargetType : uint8_t { CPU, GPU, UNK };
enum class PrecisionType : uint8_t { FLOAT32, FLOAT16, UNK };
enum class LayoutType : uint8_t { NCHW, NHWC, UNK };
enum class LayoutType : uint8_t { NCHW, NHWC, ANY, UNK };
enum class PrecisionType : uint8_t {
UINT8,
INT8,
INT16,
INT32,
INT64,
FLOAT16,
BFLOAT16,
FLOAT32,
FLOAT64,
COMPLEX64,
COMPLEX128,
BOOL,
UNK
};
struct Place {
TargetType target;
......
......@@ -34,9 +34,10 @@ def DenseTensor : Infrt_Type<"DenseTensor"> {
let summary = "infrt dense tensor";
let description = [{dense_tensor<, 3>}];
let parameters = (ins
"TargetType":$target,
"PrecisionType":$precision,
"LayoutType":$layout
"::infrt::TargetType":$target,
"::infrt::PrecisionType":$precision,
"::infrt::LayoutType":$layout
);
}
......
......@@ -23,6 +23,8 @@
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h"
#include "paddle/infrt/dialect/phi/ir/phi_base.h"
#include "paddle/infrt/dialect/phi/ir/phi_kernels.h"
#include "paddle/infrt/dialect/tensor_shape.h"
namespace infrt {
......@@ -34,6 +36,8 @@ void registerCinnDialects(mlir::DialectRegistry &registry) { // NOLINT
mlir::pd::PaddleDialect,
#ifdef INFRT_WITH_PHI
phi::PHIDenseTensorDialect,
phi::PHICPUKernelDialect,
phi::PHIGPUKernelDialect,
phi::PHIDialect
#endif
>();
......
#mlir_tablegen_on(infrt_phi_base DIALECT phi)
add_mlir_dialect(infrt_phi_base phi)
add_mlir_dialect(infrt_phi_tensor phi_dt)
add_mlir_dialect(infrt_phi_kernel phi_kernel)
add_mlir_dialect(phi_cpu_kernels phi_cpu)
add_mlir_dialect(phi_gpu_kernels phi_gpu)
#mlir_tablegen_on(infrt_phi_tensor)
gather_srcs(infrt_src SRCS
phi_base.cc
infrt_phi_tensor.cc)
infrt_phi_tensor.cc
phi_kernels.cc)
......@@ -6,24 +6,32 @@ include "mlir/IR/OpBase.td"
include "paddle/infrt/dialect/infrt_base.td"
include "paddle/infrt/dialect/phi/ir/infrt_phi_base.td"
def PHI_KernelDialect : Dialect {
let name = "phi_kernel";
def PHI_CPUKernelDialect : Dialect {
let name = "phi_cpu";
let description = [{
The PHI Kernel dialect.
The PHI CPU Kernel dialect.
}];
let cppNamespace = "::infrt::phi";
}
def PHI_GPUKernelDialect : Dialect {
let name = "phi_gpu";
let description = [{
The PHI GPU Kernel dialect.
}];
let cppNamespace = "::infrt::phi";
}
// PHI Kernel related ops.
class PDT_Kernel<string mnemonic, list<OpTrait> traits = []> : Op<PHI_KernelDialect, mnemonic, !listconcat(traits, [PhiOpTrait, IsolatedFromAbove])> {
class PDTCPU_Kernel<string mnemonic, list<OpTrait> traits = []> : Op<PHI_CPUKernelDialect, mnemonic, !listconcat(traits, [PhiOpTrait, IsolatedFromAbove])> {
}
def PDCK_AbsOp : PDT_Kernel<"phi.abs.host.fp32"> {
let arguments = (ins CPU_Context:$dev_ctx, DenseTensor:$x);
let results = (outs DenseTensor:$output);
// PHI Kernel related ops.
class PDTGPU_Kernel<string mnemonic, list<OpTrait> traits = []> : Op<PHI_GPUKernelDialect, mnemonic, !listconcat(traits, [PhiOpTrait, IsolatedFromAbove])> {
}
#endif
......@@ -34,6 +34,14 @@ class FillDenseTensorOp<Attr attr_type, string dtype> :
attr_type:$value
);
let results = (outs);
let assemblyFormat = "`(` $input `:` type($input) `)` attr-dict";
}
class PrintDenseTensorOp:
PDT_Op<"print_tensor"> {
let arguments = (ins DenseTensor:$input);
let results = (outs);
let assemblyFormat = "`(` $input `:` type($input) `)` attr-dict";
}
class CreateCPUAllocatorOp
......@@ -44,7 +52,7 @@ class CreateCPUAllocatorOp
class CreateCPUContextOp
: PDT_Op<"create_context." # "cpu", [NoSideEffect]> {
let arguments = (ins);
let arguments = (ins CPU_Allocator:$input);
let results = (outs CPU_Context:$output);
}
......@@ -52,6 +60,7 @@ def PDT_CreateDenseTensorOp_cpu_f32_nchw : CreateDenseTensorOp<"cpu", "f32", "nc
def PDT_FillDenseTensorOp_f32 : FillDenseTensorOp<F32ArrayAttr, "f32">;
def PDT_CreateAllocatorOp_cpu : CreateCPUAllocatorOp;
def PDT_CreateContextOp_cpu : CreateCPUContextOp;
def PDT_PrintDenseTensor_cpu : PrintDenseTensorOp;
def FakeKernelOp : PDT_Op<"fake_phi_kernel"> {
let arguments = (ins CPU_Context:$dev_ctx, DenseTensor:$x, DenseTensor:$y, BoolAttr:$transpose_x, BoolAttr:$transpose_y);
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/dialect/phi/ir/phi_kernels.h"
#include <mlir/IR/BuiltinTypes.h>
#include "paddle/infrt/dialect/phi/ir/phi_gpu_kernelsDialect.cpp.inc"
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/phi/ir/phi_cpu_kernels.cpp.inc" // NOLINT
#include "paddle/infrt/dialect/phi/ir/phi_cpu_kernelsDialect.cpp.inc"
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/phi/ir/phi_gpu_kernels.cpp.inc" // NOLINT
namespace infrt {
namespace phi {
void PHICPUKernelDialect::initialize() {
#define GET_OP_LIST
addOperations<
#include "paddle/infrt/dialect/phi/ir/phi_cpu_kernels.cpp.inc" // NOLINT
>();
}
void PHIGPUKernelDialect::initialize() {
#define GET_OP_LIST
addOperations<
#include "paddle/infrt/dialect/phi/ir/phi_gpu_kernels.cpp.inc" // NOLINT
>();
}
} // namespace phi
} // namespace infrt
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mlir/Dialect/Traits.h>
#include <mlir/IR/Attributes.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/BuiltinTypes.h>
#include <mlir/IR/Dialect.h>
#include <mlir/IR/Matchers.h>
#include <mlir/IR/OpImplementation.h>
#include <mlir/IR/TypeUtilities.h>
#include <mlir/Interfaces/CallInterfaces.h>
#include <mlir/Interfaces/DerivedAttributeOpInterface.h>
#include <mlir/Interfaces/InferTypeOpInterface.h>
#include <mlir/Interfaces/LoopLikeInterface.h>
#include <mlir/Interfaces/SideEffectInterfaces.h>
#include "paddle/infrt/dialect/dense_tensor.h"
#include "paddle/infrt/dialect/infrt/infrt_dialect.h"
#include "paddle/infrt/dialect/phi/ir/phi_base.h"
#include "paddle/infrt/dialect/phi/ir/phi_cpu_kernelsDialect.h.inc"
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/phi/ir/phi_cpu_kernels.h.inc"
#include "paddle/infrt/dialect/phi/ir/phi_gpu_kernelsDialect.h.inc"
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/phi/ir/phi_gpu_kernels.h.inc"
......@@ -41,26 +41,49 @@ TargetType cvtTargetFromPhi(phi::Backend backend) {
}
phi::DataType cvtPrecision2Phi(PrecisionType precision) {
#define CONVERT_PRECISION_TO_PHI(Precision) \
case PrecisionType::Precision: \
return phi::DataType::Precision;
switch (precision) {
case PrecisionType::FLOAT32:
return phi::DataType::FLOAT32;
break;
case PrecisionType::FLOAT16:
return phi::DataType::FLOAT16;
CONVERT_PRECISION_TO_PHI(FLOAT32)
CONVERT_PRECISION_TO_PHI(FLOAT16)
CONVERT_PRECISION_TO_PHI(FLOAT64)
CONVERT_PRECISION_TO_PHI(UINT8)
CONVERT_PRECISION_TO_PHI(INT8)
CONVERT_PRECISION_TO_PHI(INT16)
CONVERT_PRECISION_TO_PHI(INT32)
CONVERT_PRECISION_TO_PHI(INT64)
CONVERT_PRECISION_TO_PHI(COMPLEX64)
CONVERT_PRECISION_TO_PHI(COMPLEX128)
CONVERT_PRECISION_TO_PHI(BOOL)
default:
return phi::DataType::UNDEFINED;
}
#undef CONVERT_PRECISION_TO_PHI
}
PrecisionType cvtPrecisionFromPhi(phi::DataType datatype) {
#define CONVERT_PRECISION_FROM_PHI(Precision) \
case phi::DataType::Precision: \
return PrecisionType::Precision;
switch (datatype) {
case phi::DataType::FLOAT32:
return PrecisionType::FLOAT32;
case phi::DataType::FLOAT16:
return PrecisionType::FLOAT16;
CONVERT_PRECISION_FROM_PHI(FLOAT32)
CONVERT_PRECISION_FROM_PHI(FLOAT16)
CONVERT_PRECISION_FROM_PHI(FLOAT64)
CONVERT_PRECISION_FROM_PHI(UINT8)
CONVERT_PRECISION_FROM_PHI(INT8)
CONVERT_PRECISION_FROM_PHI(INT16)
CONVERT_PRECISION_FROM_PHI(INT32)
CONVERT_PRECISION_FROM_PHI(INT64)
CONVERT_PRECISION_FROM_PHI(COMPLEX64)
CONVERT_PRECISION_FROM_PHI(COMPLEX128)
CONVERT_PRECISION_FROM_PHI(BOOL)
default:
return PrecisionType::UNK;
}
#undef CONVERT_PRECISION_FROM_PHI
}
phi::DataLayout cvtLayout2Phi(LayoutType layout) {
......@@ -69,6 +92,8 @@ phi::DataLayout cvtLayout2Phi(LayoutType layout) {
return phi::DataLayout::NCHW;
case LayoutType::NHWC:
return phi::DataLayout::NHWC;
case LayoutType::ANY:
return phi::DataLayout::ANY;
default:
return phi::DataLayout::UNDEFINED;
}
......@@ -80,6 +105,8 @@ LayoutType cvtLayoutFromPhi(phi::DataLayout layout) {
return LayoutType::NCHW;
case phi::DataLayout::NHWC:
return LayoutType::NHWC;
case phi::DataLayout::ANY:
return LayoutType::ANY;
default:
return LayoutType::UNK;
}
......
......@@ -29,6 +29,7 @@
#include "paddle/infrt/kernel/tensor_shape_kernels.h"
#include "paddle/infrt/kernel/test_kernels.h"
#ifdef INFRT_WITH_PHI
#include "paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.h"
#include "paddle/infrt/kernel/phi/registry.h"
#endif
......@@ -58,6 +59,7 @@ int main(int argc, char** argv) {
kernel::RegisterControlFlowKernels(&registry);
#ifdef INFRT_WITH_PHI
kernel::RegisterPhiKernels(&registry);
kernel::RegisterInferShapeLaunchers(&registry);
#endif
// load extra shared library
......
......@@ -18,7 +18,13 @@ namespace infrt {
namespace kernel {
namespace phi {
::phi::CPUContext CreateCpuContext() { return {}; }
::phi::CPUContext CreateCpuContext(
infrt::backends::CpuPhiAllocator* allocator) {
::phi::CPUContext context;
context.SetAllocator(allocator);
context.Init();
return context;
}
} // namespace phi
} // namespace kernel
......
......@@ -14,6 +14,7 @@
#pragma once
#include "paddle/infrt/backends/host/phi_allocator.h"
#include "paddle/infrt/backends/host/phi_context.h"
#include "paddle/phi/core/dense_tensor.h"
......@@ -21,7 +22,7 @@ namespace infrt {
namespace kernel {
namespace phi {
::phi::CPUContext CreateCpuContext();
::phi::CPUContext CreateCpuContext(::infrt::backends::CpuPhiAllocator*);
} // namespace phi
} // namespace kernel
......
......@@ -13,7 +13,7 @@
// limitations under the License.
#include "paddle/infrt/kernel/phi/dense_tensor_kernels.h"
#include <iostream>
namespace infrt {
namespace kernel {
namespace phi {
......@@ -30,8 +30,38 @@ namespace phi {
}
void FillDenseTensorF32(::phi::DenseTensor* dense_tensor,
host_context::Attribute<std::vector<int64_t>> values) {}
host_context::Attribute<std::vector<float>> values) {
auto place = ::phi::CPUPlace();
float* a_data = dense_tensor->mutable_data<float>(place);
for (int64_t i = 0; i < dense_tensor->numel(); ++i) {
a_data[i] = (values.get())[i];
}
}
void PrintDenseTensor(::phi::DenseTensor* dense_tensor) {
#define PRINT_META_DATA(PHI_DATATYPE, DTYPE) \
case ::phi::DataType::PHI_DATATYPE: { \
DTYPE* data = dense_tensor->data<DTYPE>(); \
if (dense_tensor->numel() == 0) break; \
std::cout << data[0]; \
for (int64_t i = 1; i < dense_tensor->numel(); i++) { \
std::cout << "," << data[i]; \
} \
break; \
}
::phi::DDim dims = dense_tensor->dims();
std::cout << "dense_tensor: shape=shape" << dims.to_str() << ","
<< " values=[";
switch (dense_tensor->dtype()) {
PRINT_META_DATA(FLOAT32, float);
PRINT_META_DATA(INT32, int32_t);
default:
std::cout << "Error! Unsupported data type!\n";
}
std::cout << "]\n";
#undef PRINT_META_DATA
}
} // namespace phi
} // namespace kernel
} // namespace infrt
......@@ -28,7 +28,8 @@ namespace phi {
host_context::Attribute<std::vector<int64_t>> lod);
void FillDenseTensorF32(::phi::DenseTensor* dense_tensor,
host_context::Attribute<std::vector<int64_t>> values);
host_context::Attribute<std::vector<float>> values);
void PrintDenseTensor(::phi::DenseTensor* dense_tensor);
} // namespace phi
} // namespace kernel
......
......@@ -54,7 +54,7 @@ TEST(ElementwiseAdd, launcher_registry) {
host_context::KernelRegistry registry;
RegisterInferShapeLaunchers(&registry);
ASSERT_GE(registry.size(), 1UL);
auto creator = registry.GetKernel("pten.add.cpu.any.fp32");
auto creator = registry.GetKernel("phi_cpu.add.any.float32");
const phi::DDim dims({1, 2});
const phi::DataType dtype{phi::DataType::FLOAT32};
......
......@@ -42,6 +42,8 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) {
INFRT_KERNEL(infrt::kernel::phi::CreateDenseTensorCpuF32Nchw));
registry->AddKernel("phi_dt.fill_dense_tensor.f32",
INFRT_KERNEL(infrt::kernel::phi::FillDenseTensorF32));
registry->AddKernel("phi_dt.print_tensor",
INFRT_KERNEL(infrt::kernel::phi::PrintDenseTensor));
registry->AddKernel(
"phi_dt.fake_phi_kernel",
std::bind(&KernelLauncherFunc<decltype(&FakePhiKernel),
......
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @fake_phi_kernel_execute
func @fake_phi_kernel_execute() {
// CHECK-LABEL: @sign_any_float32_execute
func @sign_any_float32_execute() {
%allocator = "phi_dt.create_allocator.cpu" (): () -> !phi.CPU_allocator
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.CPU_context
%ctx = "phi_dt.create_context.cpu" (%allocator): (!phi.CPU_allocator) -> !phi.CPU_context
%t = "phi_dt.create_dense_tensor.cpu.f32.nchw" (%allocator) {dims=[1:i64], lod=[1:i64]}: (!phi.CPU_allocator) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%e = "phi_cpu.sign.any.float32"(%ctx, %t) : (!phi.CPU_context, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
// CHECK: @FakePhiKernel@
%d = "phi_dt.fake_phi_kernel" (%ctx, %t, %t) {transpose_x=false, transpose_y=false} : (!phi.CPU_context, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
// CHECK: dense_tensor: shape=shape[1], values=[1]
"phi_dt.print_tensor" (%e) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
Infrt.return
}
......@@ -33,14 +33,16 @@ function update_pd_ops() {
rm -rf ${PADDLE_ROOT}/build && mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build
cmake .. -DWITH_PYTHON=ON -DWITH_GPU=OFF -DPYTHON_EXECUTABLE=`which python3` -DWITH_XBYAK=OFF -DWITH_NCCL=OFF -DWITH_RCCL=OFF -DWITH_CRYPTO=OFF
make -j8 paddle_python
make -j8 paddle_python print_pten_kernels
cd ${PADDLE_ROOT}/build
./paddle/phi/tools/print_pten_kernels > ../tools/infrt/kernels.json
cd python/dist/
python3 -m pip uninstall -y paddlepaddle
python3 -m pip install *whl
# update pd_ops.td
cd ${PADDLE_ROOT}/tools/infrt/
python3 generate_pd_op_dialect_from_paddle_op_maker.py
python3 generate_phi_kernel_dialect.py ./kernels.json
}
function init() {
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import sys
attr_type_converter = {"i": 'SI32Attr', "b": 'BoolAttr', "l": 'SI64Attr'}
supported_kernels = ['sign', 'dot', 'digamma', 'conj']
target_type_converter = {"CPU": "CPU", "GPU": "GPU"}
layout_type_converter = {
"NCHW": "NCHW",
"NHWC": "NHWC",
"Undefined(AnyLayout)": "ANY"
}
precision_type_converter = {
"uint8": "UINT8",
"int8": "INT8",
"int16": "INT16",
"int32": "INT32",
"int64": "INT64",
"float16": "FLOAT16",
"bfloat16": "BFLOAT16",
"float32": "FLOAT32",
"float64": "FLOAT64",
"complex64": "COMPLEX64",
"complex128": "COMPLEX128",
"bool": "BOOL"
}
def generate_kernel_name(op_name, place_str):
[target_, layout_, precision_] = place_str[1:-1].split(',')
target_ = target_type_converter[target_.strip()]
layout_ = layout_type_converter[layout_.strip()]
precision_ = precision_type_converter[precision_.strip()]
alias_ = "{}.{}".format(op_name, ".".join(
[target_.strip(), layout_.strip(), precision_.strip()]))
return alias_
def generate_attrs_info(op_name, attrs_info):
kernel_attrs_names = {
'split': ['sections', 'num', 'axis', 'mkldnn_data_type'],
'sign': [],
'masked_select': [],
'trace': ['offset', 'axis1', 'axis2'],
'concat': ['axis'],
'empty': ['shape', 'dtype'],
'conj': [],
'norm': ['axis', 'epsilon', 'is_test'],
'histogram': ['bins', 'min', 'max'],
'dot': [],
'scale': ['scale', 'bias', 'bias_after_scale'],
'digamma': [],
'lerp': [],
'cast': ['out_dtype', 'in_dtype'],
'abs': []
}
attrs_args_ = ""
if len(kernel_attrs_names[op_name]) == len(attrs_info):
for index in range(len(attrs_info)):
attr_name = kernel_attrs_names[op_name][index]
attr_type = attr_type_converter[attrs_info[index]]
attrs_args_ += '{type_}:${name_},'.format(
type_=attr_type, name_=attr_name)
return attrs_args_[:-1]
def generate_inputs_info(input_info):
input_args_ = ""
for index in range(len(input_info)):
[target_, layout_, precision_] = input_info[index].split(',')
# todo: check vadility
target_ = target_type_converter[target_.strip()]
layout_ = layout_type_converter[layout_.strip()]
precision_ = precision_type_converter[precision_.strip()]
input_args_ += " DenseTensor<\"{}\",\"{}\",\"{}\">:$in{},".format(
target_.strip(), precision_.strip(), layout_.strip(), str(index))
input_args_ = input_args_[:-1]
return input_args_
def generate_arguments_info(op_name, input_info, attr_info):
input_args = generate_inputs_info(input_info)
attr_args = generate_attrs_info(op_name, attr_info)
context_args = "CPU_Context:$dev_ctx"
argument_ = "{},{},{}".format(context_args, input_args, attr_args)
return (("let arguments = (ins {});".format(argument_.strip(","))))
def generate_results_info(output_info):
output_args_ = "let results = (outs "
for index in range(len(output_info)):
[target_, layout_, precision_] = output_info[index].split(',')
# todo: check vadility
target_ = target_type_converter[target_.strip()]
layout_ = layout_type_converter[layout_.strip()]
precision_ = precision_type_converter[precision_.strip()]
output_args_ += " DenseTensor<\"{}\",\"{}\",\"{}\">:$out{},".format(
target_.strip(), precision_.strip(), layout_.strip(), str(index))
return ("{});".format(output_args_[:-1]))
def generate_supported_kernel_list(load_dict):
supported_kernels_list_ = []
for op_name in load_dict:
kernel_list = load_dict[op_name]
for kernel_info in kernel_list:
for kernel_alias_ in kernel_info:
attributes = kernel_info[kernel_alias_]["attribute"]
flag = True
for attribute in attributes:
if attribute not in attr_type_converter:
flag = False
if flag:
supported_kernels_list_.append(op_name)
alias_ = generate_kernel_dialect(op_name, kernel_alias_,
kernel_info[kernel_alias_])
supported_kernels_list_ = list(set(supported_kernels_list_))
print(supported_kernels_list_)
def scan_kernel_info(load_dict):
target_type_ = []
layout_type_ = []
precision_type_ = []
for op_name in load_dict:
kernel_list = load_dict[op_name]
for kernel_info in kernel_list:
for kernel_alias_ in kernel_info:
[target_, layout_, precision_] = kernel_alias_[1:-1].split(',')
target_type_.append(target_.strip())
layout_type_.append(layout_.strip())
precision_type_.append(precision_.strip())
target_type_ = list(set(target_type_))
layout_type_ = list(set(layout_type_))
precision_type_ = list(set(precision_type_))
print(target_type_)
print(layout_type_)
print(precision_type_)
def generate_cpu_kernel_dialect(op_name, kernel_alias_, kernel_info):
alias = generate_kernel_name(op_name, kernel_alias_)
summary = 'let summary = "{name}";'.format(name=alias)
dialect_name = alias.split(".")
dialect_name = dialect_name[0] + "." + dialect_name[2] + "." + dialect_name[
3]
header = 'def {kernel_name} : PDTCPU_Kernel<"{name}",[NoSideEffect]> {left_brace}'.format(
kernel_name=alias.replace(".", ""),
name=dialect_name.lower(),
left_brace="{")
inputs_ = kernel_info["input"]
attributes = kernel_info["attribute"]
arguments = generate_arguments_info(op_name, inputs_, attributes)
outputs = kernel_info["output"]
results = generate_results_info(outputs)
kernel_dialect = '{header_}\n {summary_}\n {arguments_}\n {results_}\n{right_brace}\n'.format(
header_=header,
summary_=summary,
arguments_=arguments,
results_=results,
right_brace="}")
return kernel_dialect
def generate_gpu_kernel_dialect(op_name, kernel_alias_, kernel_info):
alias = generate_kernel_name(op_name, kernel_alias_)
summary = 'let summary = "{name}";'.format(name=alias)
dialect_name = alias.split(".")
dialect_name = dialect_name[0] + "." + dialect_name[2] + "." + dialect_name[
3]
header = 'def {kernel_name} : PDTGPU_Kernel<"{name}",[NoSideEffect]> {left_brace}'.format(
kernel_name=alias.replace(".", ""),
name=dialect_name.lower(),
left_brace="{")
inputs_ = kernel_info["input"]
attributes = kernel_info["attribute"]
arguments = generate_arguments_info(op_name, inputs_, attributes)
outputs = kernel_info["output"]
results = generate_results_info(outputs)
kernel_dialect = '{header_}\n {summary_}\n {arguments_}\n {results_}\n{right_brace}\n'.format(
header_=header,
summary_=summary,
arguments_=arguments,
results_=results,
right_brace="}")
return kernel_dialect
def generate_dialect_head():
comment_ = "/*===- TableGen'source file -----------------------------------------------===*\\\n\
|* *|\n\
|* Kernel Definitions *|\n\
|* *|\n\
|* Automatically generated file, do not edit! *|\n\
|* Generated by tools/infrt/generate_pten_kernel_dialect.py *|\n\
|* *|\n\
\*===----------------------------------------------------------------------===*/\n"
includes_ = "#ifndef PTEN_KERNELS\n\
#define PTEN_KERNELS\n\
include \"mlir/Interfaces/InferTypeOpInterface.td\"\n\
include \"mlir/Interfaces/LoopLikeInterface.td\"\n\
include \"mlir/IR/OpBase.td\"\n\
include \"paddle/infrt/dialect/phi/ir/infrt_phi_kernel.td\""
return (comment_ + includes_)
def get_kernel_target(kernel_alias_):
target = kernel_alias_[1:-1].split(",")
return target[0]
def main(path_):
with open(path_, "r") as f:
load_dict = json.load(f)
head = generate_dialect_head()
cpu_registry_ = ""
gpu_registry_ = ""
for op_name in load_dict:
if op_name not in supported_kernels:
continue
kernel_list = load_dict[op_name]
for kernel_info in kernel_list:
for kernel_alias_ in kernel_info:
if get_kernel_target(kernel_alias_) == "CPU":
kernel_registry = generate_cpu_kernel_dialect(
op_name, kernel_alias_, kernel_info[kernel_alias_])
cpu_registry_ += kernel_registry
elif get_kernel_target(kernel_alias_) == "GPU":
kernel_registry = generate_gpu_kernel_dialect(
op_name, kernel_alias_, kernel_info[kernel_alias_])
gpu_registry_ += kernel_registry
else:
print("Unsupported backend:" + get_kernel_target(
kernel_alias_))
end = "#endif // PTEN_KERNELS"
with open("../../paddle/infrt/dialect/phi/ir/phi_cpu_kernels.td",
"w") as dst:
dst.write('{start_}\n{dialect_}\n{end_}'.format(
start_=head, dialect_=cpu_registry_, end_=end))
with open("../../paddle/infrt/dialect/phi/ir/phi_gpu_kernels.td",
"w") as dst:
dst.write('{start_}\n{dialect_}\n{end_}'.format(
start_=head, dialect_=gpu_registry_, end_=end))
if __name__ == '__main__':
path = sys.argv[1]
main(path)
......@@ -150,19 +150,19 @@ def gen_dtype(vals: List[str]):
ir_dtypes, origin_dtypes = [], []
for val in vals:
if val == "float":
ir_dtypes.append("fp32")
ir_dtypes.append("float32")
origin_dtypes.append("float")
elif val == "double":
ir_dtypes.append("fp64")
ir_dtypes.append("float64")
origin_dtypes.append("double")
elif val == "float16":
ir_dtypes.append("fp16")
ir_dtypes.append("float16")
origin_dtypes.append("paddle::experimental::float16")
elif val == "bfloat16":
ir_dtypes.append("bf16")
origin_dtypes.append("paddle::experimental::bfloat16")
elif val == "bool":
ir_dtypes.append("int1")
ir_dtypes.append("bool")
origin_dtypes.append("bool")
elif val == "int8_t":
ir_dtypes.append("int8")
......@@ -219,8 +219,8 @@ def gen_register_info(resources: List[List[str]]):
for ir_dtype, origin_dtype in zip(ir_dtypes, origin_dtypes):
kernel_func = gen_kernel_func(update_item[3], ctx_name,
origin_dtype)
ir_name = 'pten.' + '.'.join(
[it.lower() for it in update_item[:3]]) + "." + ir_dtype
ir_name = 'phi_cpu.' + update_item[0].lower() + '.' + update_item[
2].lower() + '.' + ir_dtype
res += f"""
registry->AddKernel("{ir_name}","""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册