From 4d1ce1844b45f71be6f46f6227c67f89c8b10adf Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Wed, 22 Dec 2021 11:01:36 +0800 Subject: [PATCH] [PTen]Move flatten kernel to new directory (#38255) * move flatten * fix bugs of test * modify header file * add copy declare * fix compile bugs --- paddle/fluid/operators/flatten_op.h | 3 +- paddle/pten/CMakeLists.txt | 2 +- paddle/pten/api/lib/kernel_declare.h | 3 + paddle/pten/include/manipulation.h | 3 +- paddle/pten/kernels/CMakeLists.txt | 15 +++ paddle/pten/kernels/cpu/manipulation.cc | 46 ------- paddle/pten/kernels/cpu/manipulation.h | 7 - paddle/pten/kernels/flatten_kernel.cc | 126 ++++++++++++++++++ paddle/pten/kernels/flatten_kernel.h | 36 +++++ paddle/pten/kernels/functions/common_shape.h | 34 +++++ paddle/pten/kernels/gpu/manipulation.cu | 47 ------- paddle/pten/kernels/gpu/manipulation.h | 7 - paddle/pten/kernels/xpu/manipulation.cc | 56 -------- paddle/pten/kernels/xpu/manipulation.h | 7 - .../tests/kernels/test_flatten_dev_api.cc | 10 ++ 15 files changed, 229 insertions(+), 173 deletions(-) create mode 100644 paddle/pten/kernels/flatten_kernel.cc create mode 100644 paddle/pten/kernels/flatten_kernel.h create mode 100644 paddle/pten/kernels/functions/common_shape.h diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h index 7d08a95821..29eb579b2a 100644 --- a/paddle/fluid/operators/flatten_op.h +++ b/paddle/fluid/operators/flatten_op.h @@ -134,7 +134,8 @@ class FlattenContiguousRangeKernel : public framework::OpKernel { auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); // call new kernel - pten::Flatten(dev_ctx, *pt_x.get(), start_axis, stop_axis, pt_out.get()); + pten::Flatten(dev_ctx, *pt_x.get(), start_axis, stop_axis, + pt_out.get()); } }; diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 799ec885b9..7c870ec336 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -24,7 +24,7 @@ add_subdirectory(tests) # make an unity target for compile deps set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context) -set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu) +set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu flatten) set(PTEN_DEPS ${PTEN_DEPS} nary unary binary) if(WITH_GPU OR WITH_ROCM) set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu) diff --git a/paddle/pten/api/lib/kernel_declare.h b/paddle/pten/api/lib/kernel_declare.h index 4dbd46bff6..01a3c193a3 100644 --- a/paddle/pten/api/lib/kernel_declare.h +++ b/paddle/pten/api/lib/kernel_declare.h @@ -22,6 +22,7 @@ limitations under the License. */ PT_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT); +PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(flatten, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(sign, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); @@ -30,6 +31,7 @@ PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PT_DECLARE_KERNEL(full_like, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT); +PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(flatten, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT); @@ -38,4 +40,5 @@ PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT); #ifdef PADDLE_WITH_XPU PT_DECLARE_KERNEL(flatten, XPU, ALL_LAYOUT); +PT_DECLARE_KERNEL(reshape, XPU, ALL_LAYOUT); #endif diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h index e94f2a6180..80bfcef89f 100644 --- a/paddle/pten/include/manipulation.h +++ b/paddle/pten/include/manipulation.h @@ -18,6 +18,7 @@ #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/manipulation.h" +#include "paddle/pten/kernels/flatten_kernel.h" #include "paddle/pten/kernels/gpu/manipulation.h" #include "paddle/pten/kernels/xpu/manipulation.h" @@ -33,7 +34,7 @@ DenseTensor Flatten(const ContextT& dev_ctx, pten::make_intrusive( dev_ctx.GetPlace()), std::move(out_meta)); - Flatten(dev_ctx, x, start_axis, stop_axis, &dense_out); + Flatten(dev_ctx, x, start_axis, stop_axis, &dense_out); return dense_out; } diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index 818ce6cb77..bacdc1ce67 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -17,3 +17,18 @@ endif() if(WITH_XPU) add_subdirectory(xpu) endif() + +set(FLATTEN_DEPS dense_tensor kernel_context kernel_factory utils_cpu unary) +if(WITH_GPU OR WITH_ROCM) + set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_gpu) +elseif(WITH_XPU) + set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_xpu) +endif() + +if(WITH_GPU) + nv_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS}) +elseif(WITH_ROCM) + hip_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS}) +else() + cc_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS}) +endif() diff --git a/paddle/pten/kernels/cpu/manipulation.cc b/paddle/pten/kernels/cpu/manipulation.cc index 32bc8e4e35..b413882c86 100644 --- a/paddle/pten/kernels/cpu/manipulation.cc +++ b/paddle/pten/kernels/cpu/manipulation.cc @@ -21,31 +21,6 @@ namespace pten { -template -void Flatten(const CPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out) { - auto out_dims = out->dims(); - pten::Copy(dev_ctx, x, false, out); - out->Resize(out_dims); -} - -// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate -// Output Tensor, -// is there a more flexible way to deal with this case? -template -void FlattenWithXShape(const CPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out, - DenseTensor* xshape) { - Flatten(dev_ctx, x, start_axis, stop_axis, out); - general::SetXShape(x, xshape); -} - void Reshape(const CPUContext& dev_ctx, const DenseTensor& x, const ScalarArray& shape, @@ -83,27 +58,6 @@ void Cast(const CPUContext& dev_ctx, } // namespace pten -PT_REGISTER_KERNEL(flatten, - CPU, - ALL_LAYOUT, - pten::Flatten, - float, - double, - uint8_t, - int8_t, - int, - int64_t) {} -PT_REGISTER_KERNEL(flatten_with_xshape, - CPU, - ALL_LAYOUT, - pten::FlattenWithXShape, - float, - double, - uint8_t, - int8_t, - int, - int64_t) {} - PT_REGISTER_KERNEL(cast, CPU, ALL_LAYOUT, diff --git a/paddle/pten/kernels/cpu/manipulation.h b/paddle/pten/kernels/cpu/manipulation.h index 1a219dc79e..5aa5344b19 100644 --- a/paddle/pten/kernels/cpu/manipulation.h +++ b/paddle/pten/kernels/cpu/manipulation.h @@ -21,13 +21,6 @@ limitations under the License. */ namespace pten { -template -void Flatten(const CPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out); - template void Cast(const CPUContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/flatten_kernel.cc b/paddle/pten/kernels/flatten_kernel.cc new file mode 100644 index 0000000000..b2b5d74432 --- /dev/null +++ b/paddle/pten/kernels/flatten_kernel.cc @@ -0,0 +1,126 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/flatten_kernel.h" +#include "paddle/pten/api/ext/dispatch.h" +#include "paddle/pten/backends/all_context.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/infermeta/unary.h" +#include "paddle/pten/kernels/cpu/utils.h" +#include "paddle/pten/kernels/functions/common_shape.h" +#include "paddle/pten/kernels/gpu/utils.h" +#include "paddle/pten/kernels/xpu/utils.h" + +namespace pten { + +template +void Flatten(const ContextT& dev_ctx, + const DenseTensor& x, + int start_axis, + int stop_axis, + DenseTensor* out) { + auto out_dims = out->dims(); + pten::Copy(dev_ctx, x, false, out); + out->Resize(out_dims); +} + +// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate +// Output Tensor, +// is there a more flexible way to deal with this case? +template +void FlattenWithXShape(const ContextT& dev_ctx, + const DenseTensor& x, + int start_axis, + int stop_axis, + DenseTensor* out, + DenseTensor* xshape) { + Flatten(dev_ctx, x, start_axis, stop_axis, out); + functions::SetXShape(x, xshape); +} + +} // namespace pten + +PT_REGISTER_CTX_KERNEL(flatten, + CPU, + ALL_LAYOUT, + pten::Flatten, + float, + double, + uint8_t, + int8_t, + int, + int64_t) {} + +PT_REGISTER_CTX_KERNEL(flatten_with_xshape, + CPU, + ALL_LAYOUT, + pten::FlattenWithXShape, + float, + double, + uint8_t, + int8_t, + int, + int64_t) {} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PT_REGISTER_CTX_KERNEL(flatten, + GPU, + ALL_LAYOUT, + pten::Flatten, + float, + paddle::platform::float16, + double, + uint8_t, + int8_t, + int, + int64_t) {} + +PT_REGISTER_CTX_KERNEL(flatten_with_xshape, + GPU, + ALL_LAYOUT, + pten::FlattenWithXShape, + float, + paddle::platform::float16, + double, + uint8_t, + int8_t, + int, + int64_t) {} +#endif + +#ifdef PADDLE_WITH_XPU +PT_REGISTER_CTX_KERNEL(flatten, + XPU, + ALL_LAYOUT, + pten::Flatten, + float, + paddle::platform::float16, + double, + uint8_t, + int8_t, + int, + int64_t) {} + +PT_REGISTER_CTX_KERNEL(flatten_with_xshape, + XPU, + ALL_LAYOUT, + pten::FlattenWithXShape, + float, + paddle::platform::float16, + double, + uint8_t, + int8_t, + int, + int64_t) {} +#endif diff --git a/paddle/pten/kernels/flatten_kernel.h b/paddle/pten/kernels/flatten_kernel.h new file mode 100644 index 0000000000..6ce0a2be20 --- /dev/null +++ b/paddle/pten/kernels/flatten_kernel.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/core/dense_tensor.h" + +namespace pten { + +template +void Flatten(const ContextT& dev_ctx, + const DenseTensor& x, + int start_axis, + int stop_axis, + DenseTensor* out); + +template +void FlattenWithXShape(const ContextT& dev_ctx, + const DenseTensor& x, + int start_axis, + int stop_axis, + DenseTensor* out, + DenseTensor* xshape); + +} // namespace pten diff --git a/paddle/pten/kernels/functions/common_shape.h b/paddle/pten/kernels/functions/common_shape.h new file mode 100644 index 0000000000..3fa129014e --- /dev/null +++ b/paddle/pten/kernels/functions/common_shape.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/core/dense_tensor.h" + +namespace pten { +namespace functions { + +inline void SetXShape(const DenseTensor& x, DenseTensor* xshape) { + const auto& in_dims = x.meta().dims; + std::vector xshape_dims(in_dims.size() + 1); + xshape_dims[0] = 0; + for (int i = 0; i < in_dims.size(); ++i) { + xshape_dims[i + 1] = in_dims[i]; + } + xshape->Resize(paddle::framework::make_ddim(xshape_dims)); + xshape->ResetLoD(x.meta().lod); +} + +} // namespace functions +} // namespace pten diff --git a/paddle/pten/kernels/gpu/manipulation.cu b/paddle/pten/kernels/gpu/manipulation.cu index 5a82e3e030..8c4aa7449a 100644 --- a/paddle/pten/kernels/gpu/manipulation.cu +++ b/paddle/pten/kernels/gpu/manipulation.cu @@ -21,31 +21,6 @@ namespace pten { -template -void Flatten(const GPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out) { - auto out_dims = out->dims(); - pten::Copy(dev_ctx, x, false, out); - out->Resize(out_dims); -} - -// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate -// Output Tensor, -// is there a more flexible way to deal with this case? -template -void FlattenWithXShape(const GPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out, - DenseTensor* xshape) { - Flatten(dev_ctx, x, start_axis, stop_axis, out); - general::SetXShape(x, xshape); -} - void Reshape(const GPUContext& dev_ctx, const DenseTensor& x, const ScalarArray& shape, @@ -84,28 +59,6 @@ void Cast(const GPUContext& dev_ctx, using float16 = paddle::platform::float16; -PT_REGISTER_KERNEL(flatten, - GPU, - ALL_LAYOUT, - pten::Flatten, - float, - float16, - double, - uint8_t, - int8_t, - int, - int64_t) {} -PT_REGISTER_KERNEL(flatten_with_xshape, - GPU, - ALL_LAYOUT, - pten::FlattenWithXShape, - float, - double, - uint8_t, - int8_t, - int, - int64_t) {} - #define PTEN_REGISTER_CAST_CUDA_BASE_TYPE(op_name, ...) \ PT_REGISTER_KERNEL(cast, \ GPU, \ diff --git a/paddle/pten/kernels/gpu/manipulation.h b/paddle/pten/kernels/gpu/manipulation.h index b47fadd70b..af49932c2e 100644 --- a/paddle/pten/kernels/gpu/manipulation.h +++ b/paddle/pten/kernels/gpu/manipulation.h @@ -24,13 +24,6 @@ namespace pten { -template -void Flatten(const GPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out); - template void Cast(const GPUContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/xpu/manipulation.cc b/paddle/pten/kernels/xpu/manipulation.cc index 70ac70371e..ecd673015a 100644 --- a/paddle/pten/kernels/xpu/manipulation.cc +++ b/paddle/pten/kernels/xpu/manipulation.cc @@ -19,38 +19,6 @@ namespace pten { -template -void Flatten(const XPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out) { - auto out_dims = out->dims(); - pten::Copy(dev_ctx, x, false, out); - out->Resize(out_dims); -} - -// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate -// Output Tensor, -// is there a more flexible way to deal with this case? -template -void FlattenWithXShape(const XPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out, - DenseTensor* xshape) { - Flatten(dev_ctx, x, start_axis, stop_axis, out); - const auto& in_dims = x.dims(); - std::vector xshape_dims(in_dims.size() + 1); - xshape_dims[0] = 0; - for (int i = 0; i < in_dims.size(); ++i) { - xshape_dims[i + 1] = in_dims[i]; - } - xshape->Resize(paddle::framework::make_ddim(xshape_dims)); - xshape->ResetLoD(x.lod()); -} - void Reshape(const XPUContext& dev_ctx, const DenseTensor& x, const ScalarArray& shape, @@ -76,29 +44,5 @@ void ReshapeWithXShape(const XPUContext& dev_ctx, } // namespace pten -PT_REGISTER_KERNEL(flatten, - XPU, - ALL_LAYOUT, - pten::Flatten, - float, - paddle::platform::float16, - double, - uint8_t, - int8_t, - int, - int64_t) {} - -PT_REGISTER_KERNEL(flatten_with_xshape, - XPU, - ALL_LAYOUT, - pten::FlattenWithXShape, - float, - paddle::platform::float16, - double, - uint8_t, - int8_t, - int, - int64_t) {} - PT_REGISTER_NO_TEMPLATE_KERNEL( reshape, XPU, ALL_LAYOUT, pten::Reshape, ALL_DTYPE) {} diff --git a/paddle/pten/kernels/xpu/manipulation.h b/paddle/pten/kernels/xpu/manipulation.h index 0b68ae4195..b1557a279f 100644 --- a/paddle/pten/kernels/xpu/manipulation.h +++ b/paddle/pten/kernels/xpu/manipulation.h @@ -23,13 +23,6 @@ limitations under the License. */ namespace pten { -template -void Flatten(const XPUContext& dev_ctx, - const DenseTensor& x, - int start_axis, - int stop_axis, - DenseTensor* out); - void Reshape(const XPUContext& dev_ctx, const DenseTensor& x, const ScalarArray& shape, diff --git a/paddle/pten/tests/kernels/test_flatten_dev_api.cc b/paddle/pten/tests/kernels/test_flatten_dev_api.cc index a351be3cf6..d2ff7480e9 100644 --- a/paddle/pten/tests/kernels/test_flatten_dev_api.cc +++ b/paddle/pten/tests/kernels/test_flatten_dev_api.cc @@ -21,6 +21,16 @@ limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" +PT_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PT_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); +#endif + +#ifdef PADDLE_WITH_XPU +PT_DECLARE_KERNEL(copy, XPU, ALL_LAYOUT); +#endif + namespace pten { namespace tests { -- GitLab