未验证 提交 65478332 编写于 作者: 王明冬 提交者: GitHub

[infrt] add phi_dt.create_inited_dense_tensor.cpu.f32 kernel. (#40902)

上级 cfadf61b
...@@ -28,6 +28,13 @@ class CreateDenseTensorOp<string target> ...@@ -28,6 +28,13 @@ class CreateDenseTensorOp<string target>
let results = (outs DenseTensor:$output); let results = (outs DenseTensor:$output);
} }
def CreateInitedCpuFLOAT32DenseTensorOp
: PDT_Op<"create_inited_dense_tensor.cpu.f32", [NoSideEffect]> {
let arguments = (ins Context:$context, I64ArrayAttr:$dims,
LayoutAttr:$layout, I64ArrayAttr:$lod, F32Attr:$value);
let results = (outs DenseTensor:$output);
}
class FillDenseTensorOp<Attr attr_type, string dtype> : class FillDenseTensorOp<Attr attr_type, string dtype> :
PDT_Op<"fill_dense_tensor." # dtype> { PDT_Op<"fill_dense_tensor." # dtype> {
let arguments = (ins let arguments = (ins
......
...@@ -21,7 +21,10 @@ namespace phi { ...@@ -21,7 +21,10 @@ namespace phi {
::phi::CPUContext CreateCPUContext() { ::phi::CPUContext CreateCPUContext() {
::phi::CPUContext ctx{}; ::phi::CPUContext ctx{};
ctx.Init(); ctx.Init();
ctx.SetAllocator(new backends::CpuPhiAllocator{}); auto allocator = new backends::CpuPhiAllocator{};
ctx.SetAllocator(allocator);
ctx.SetHostAllocator(allocator);
ctx.SetZeroAllocator(allocator);
return ctx; return ctx;
} }
......
...@@ -56,6 +56,26 @@ namespace phi { ...@@ -56,6 +56,26 @@ namespace phi {
{})); {}));
} }
::phi::DenseTensor CreateInitedDenseTensorF32(
const ::phi::CPUContext& context,
host_context::Attribute<std::vector<int64_t>> dims,
host_context::Attribute<std::vector<int64_t>> lod,
host_context::Attribute<::infrt::LayoutType> layout,
host_context::Attribute<float> value) {
::phi::DenseTensor dense_tensor(
const_cast<::phi::Allocator*>(&context.GetAllocator()),
::phi::DenseTensorMeta(
ConvertPrecisionToPhi(::infrt::PrecisionType::FLOAT32),
::phi::make_ddim(dims.get()),
ConvertLayoutToPhi(layout.get()),
{}));
float* a_data = dense_tensor.mutable_data<float>(::phi::CPUPlace());
for (int64_t i = 0; i < dense_tensor.numel(); ++i) {
a_data[i] = value.get();
}
return dense_tensor;
}
::phi::DenseTensor CreateGPUDenseTensor( ::phi::DenseTensor CreateGPUDenseTensor(
const ::phi::GPUContext& context, const ::phi::GPUContext& context,
host_context::Attribute<std::vector<int64_t>> dims, host_context::Attribute<std::vector<int64_t>> dims,
......
...@@ -32,6 +32,13 @@ namespace phi { ...@@ -32,6 +32,13 @@ namespace phi {
host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute<::infrt::LayoutType> layout,
host_context::Attribute<::infrt::PrecisionType> precision); host_context::Attribute<::infrt::PrecisionType> precision);
::phi::DenseTensor CreateInitedDenseTensorF32(
const ::phi::CPUContext& context,
host_context::Attribute<std::vector<int64_t>> dims,
host_context::Attribute<std::vector<int64_t>> lod,
host_context::Attribute<::infrt::LayoutType> layout,
host_context::Attribute<float> value);
::phi::DenseTensor CreateGPUDenseTensor( ::phi::DenseTensor CreateGPUDenseTensor(
const ::phi::GPUContext& context, const ::phi::GPUContext& context,
host_context::Attribute<std::vector<int64_t>> dims, host_context::Attribute<std::vector<int64_t>> dims,
......
...@@ -38,6 +38,12 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) { ...@@ -38,6 +38,12 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) {
"phi_dt.create_dense_tensor.cpu", "phi_dt.create_dense_tensor.cpu",
INFRT_KERNEL(infrt::kernel::phi::CreateDenseTensor), INFRT_KERNEL(infrt::kernel::phi::CreateDenseTensor),
{"dims", "lod", "layout", "precision"}); {"dims", "lod", "layout", "precision"});
registry->AddKernelWithAttrs(
"phi_dt.create_inited_dense_tensor.cpu.f32",
INFRT_KERNEL(infrt::kernel::phi::CreateInitedDenseTensorF32),
{"dims", "lod", "layout", "value"});
registry->AddKernelWithAttrs( registry->AddKernelWithAttrs(
"phi_dt.fill_dense_tensor.f32", "phi_dt.fill_dense_tensor.f32",
INFRT_KERNEL(infrt::kernel::phi::FillDenseTensorF32), INFRT_KERNEL(infrt::kernel::phi::FillDenseTensorF32),
......
...@@ -7,26 +7,20 @@ module { ...@@ -7,26 +7,20 @@ module {
%Y, %MeanOut, %VarianceOut = "pd.batch_norm"(%4, %arg1, %arg2, %arg3, %arg4) {data_layout = "NCHW", epsilon = 9.99999974E-6 : f32, momentum = 0.899999976 : f32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) %Y, %MeanOut, %VarianceOut = "pd.batch_norm"(%4, %arg1, %arg2, %arg3, %arg4) {data_layout = "NCHW", epsilon = 9.99999974E-6 : f32, momentum = 0.899999976 : f32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>)
%out = "pd.relu"(%Y) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW> %out = "pd.relu"(%Y) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%5 = "pd.elementwise_add"(%out, %out) {axis = -1:si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW> %5 = "pd.elementwise_add"(%out, %out) {axis = -1:si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return %5 : !infrt.dense_tensor<CPU, FP32, NCHW> %6 = "pd.pool2d"(%5) {adaptive = false, pooling_type = "avg", ceil_mode = false, data_format = "NCHW", exclusive = true, global_pooling = false, ksize = [3 : i32, 3 : i32], padding_algorithm = "EXPLICIT", paddings = [1 : i32, 1 : i32], strides = [2 : i32, 2 : i32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%7 = "pd.flatten_contiguous_range"(%6) {start_axis = 1 : si32, stop_axis = 3 : si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return %7 : !infrt.dense_tensor<CPU, FP32, NCHW>
} }
func @main() { func @main() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU> %ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%t = "phi_dt.create_dense_tensor.cpu" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1], dims=[1, 3, 8, 8]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>) %t = "phi_dt.create_inited_dense_tensor.cpu.f32"(%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[1, 3, 8, 8]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> () %filter = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3, 3, 8, 8]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%filter = "phi_dt.create_dense_tensor.cpu" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1], dims=[3, 3, 8, 8]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>) %bias = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.5:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%filter) {value=[3.8:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> () %mean = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%bias = "phi_dt.create_dense_tensor.cpu" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>) %scale = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%bias) {value=[1.5:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> () %var = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%mean = "phi_dt.create_dense_tensor.cpu" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%mean) {value=[3.5:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%scale = "phi_dt.create_dense_tensor.cpu" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%scale) {value=[1.0:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%var = "phi_dt.create_dense_tensor.cpu" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%var) {value=[0.0:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%2 = infrt.call@predict(%t, %filter, %bias, %mean, %scale, %var) : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW> %2 = infrt.call@predict(%t, %filter, %bias, %mean, %scale, %var) : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
//phi_dt.print_tensor(%t : !infrt.dense_tensor<CPU, FP32, NCHW>)
phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>) phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return infrt.return
} }
......
...@@ -340,8 +340,8 @@ def convert_op_proto_into_mlir(op_descs): ...@@ -340,8 +340,8 @@ def convert_op_proto_into_mlir(op_descs):
attr_type_ = attr_mlir_converter[op_proto[ATTRS][attr][ attr_type_ = attr_mlir_converter[op_proto[ATTRS][attr][
TYPE]] TYPE]]
if (attr_type_ in [ if (attr_type_ in [
'I32ArrayAttr', 'F32ArrayAttr', 'StrArrayAttr', 'StrAttr', 'I32ArrayAttr', 'F32ArrayAttr',
'BoolArrayAttr', 'I64ArrayAttr' 'StrArrayAttr', 'BoolArrayAttr', 'I64ArrayAttr'
]): ]):
attr_list = attr_type_ + ":$" + attr + "," attr_list = attr_type_ + ":$" + attr + ","
ARGUMENTS += attr_list ARGUMENTS += attr_list
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册