未验证 提交 4d1ce184 编写于 作者: Y YuanRisheng 提交者: GitHub

[PTen]Move flatten kernel to new directory (#38255)

* move flatten

* fix bugs of test

* modify header file

* add copy declare

* fix compile bugs
上级 56e2a6a6
...@@ -134,7 +134,8 @@ class FlattenContiguousRangeKernel : public framework::OpKernel<T> { ...@@ -134,7 +134,8 @@ class FlattenContiguousRangeKernel : public framework::OpKernel<T> {
auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); auto pt_out = paddle::experimental::MakePtenDenseTensor(*out);
// call new kernel // call new kernel
pten::Flatten<T>(dev_ctx, *pt_x.get(), start_axis, stop_axis, pt_out.get()); pten::Flatten<T, DeviceContext>(dev_ctx, *pt_x.get(), start_axis, stop_axis,
pt_out.get());
} }
}; };
......
...@@ -24,7 +24,7 @@ add_subdirectory(tests) ...@@ -24,7 +24,7 @@ add_subdirectory(tests)
# make an unity target for compile deps # make an unity target for compile deps
set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context) set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu) set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu flatten)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary) set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu) set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu)
......
...@@ -22,6 +22,7 @@ limitations under the License. */ ...@@ -22,6 +22,7 @@ limitations under the License. */
PT_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(flatten, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(sign, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
...@@ -30,6 +31,7 @@ PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT); ...@@ -30,6 +31,7 @@ PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(full_like, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(full_like, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(flatten, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT);
...@@ -38,4 +40,5 @@ PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT); ...@@ -38,4 +40,5 @@ PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
PT_DECLARE_KERNEL(flatten, XPU, ALL_LAYOUT); PT_DECLARE_KERNEL(flatten, XPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(reshape, XPU, ALL_LAYOUT);
#endif #endif
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/kernels/cpu/manipulation.h"
#include "paddle/pten/kernels/flatten_kernel.h"
#include "paddle/pten/kernels/gpu/manipulation.h" #include "paddle/pten/kernels/gpu/manipulation.h"
#include "paddle/pten/kernels/xpu/manipulation.h" #include "paddle/pten/kernels/xpu/manipulation.h"
...@@ -33,7 +34,7 @@ DenseTensor Flatten(const ContextT& dev_ctx, ...@@ -33,7 +34,7 @@ DenseTensor Flatten(const ContextT& dev_ctx,
pten::make_intrusive<paddle::experimental::SharedStorage>( pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()), dev_ctx.GetPlace()),
std::move(out_meta)); std::move(out_meta));
Flatten<T>(dev_ctx, x, start_axis, stop_axis, &dense_out); Flatten<T, ContextT>(dev_ctx, x, start_axis, stop_axis, &dense_out);
return dense_out; return dense_out;
} }
......
...@@ -17,3 +17,18 @@ endif() ...@@ -17,3 +17,18 @@ endif()
if(WITH_XPU) if(WITH_XPU)
add_subdirectory(xpu) add_subdirectory(xpu)
endif() endif()
set(FLATTEN_DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
if(WITH_GPU OR WITH_ROCM)
set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_gpu)
elseif(WITH_XPU)
set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_xpu)
endif()
if(WITH_GPU)
nv_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS})
elseif(WITH_ROCM)
hip_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS})
else()
cc_library(flatten SRCS flatten_kernel.cc DEPS ${FLATTEN_DEPS})
endif()
...@@ -21,31 +21,6 @@ ...@@ -21,31 +21,6 @@
namespace pten { namespace pten {
template <typename T>
void Flatten(const CPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out) {
auto out_dims = out->dims();
pten::Copy(dev_ctx, x, false, out);
out->Resize(out_dims);
}
// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate
// Output Tensor,
// is there a more flexible way to deal with this case?
template <typename T>
void FlattenWithXShape(const CPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out,
DenseTensor* xshape) {
Flatten<T>(dev_ctx, x, start_axis, stop_axis, out);
general::SetXShape(x, xshape);
}
void Reshape(const CPUContext& dev_ctx, void Reshape(const CPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const ScalarArray& shape, const ScalarArray& shape,
...@@ -83,27 +58,6 @@ void Cast(const CPUContext& dev_ctx, ...@@ -83,27 +58,6 @@ void Cast(const CPUContext& dev_ctx,
} // namespace pten } // namespace pten
PT_REGISTER_KERNEL(flatten,
CPU,
ALL_LAYOUT,
pten::Flatten,
float,
double,
uint8_t,
int8_t,
int,
int64_t) {}
PT_REGISTER_KERNEL(flatten_with_xshape,
CPU,
ALL_LAYOUT,
pten::FlattenWithXShape,
float,
double,
uint8_t,
int8_t,
int,
int64_t) {}
PT_REGISTER_KERNEL(cast, PT_REGISTER_KERNEL(cast,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
......
...@@ -21,13 +21,6 @@ limitations under the License. */ ...@@ -21,13 +21,6 @@ limitations under the License. */
namespace pten { namespace pten {
template <typename T>
void Flatten(const CPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out);
template <typename T> template <typename T>
void Cast(const CPUContext& dev_ctx, void Cast(const CPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/flatten_kernel.h"
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/cpu/utils.h"
#include "paddle/pten/kernels/functions/common_shape.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/xpu/utils.h"
namespace pten {
template <typename T, typename ContextT>
void Flatten(const ContextT& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out) {
auto out_dims = out->dims();
pten::Copy(dev_ctx, x, false, out);
out->Resize(out_dims);
}
// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate
// Output Tensor,
// is there a more flexible way to deal with this case?
template <typename T, typename ContextT>
void FlattenWithXShape(const ContextT& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out,
DenseTensor* xshape) {
Flatten<T, ContextT>(dev_ctx, x, start_axis, stop_axis, out);
functions::SetXShape(x, xshape);
}
} // namespace pten
PT_REGISTER_CTX_KERNEL(flatten,
CPU,
ALL_LAYOUT,
pten::Flatten,
float,
double,
uint8_t,
int8_t,
int,
int64_t) {}
PT_REGISTER_CTX_KERNEL(flatten_with_xshape,
CPU,
ALL_LAYOUT,
pten::FlattenWithXShape,
float,
double,
uint8_t,
int8_t,
int,
int64_t) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_REGISTER_CTX_KERNEL(flatten,
GPU,
ALL_LAYOUT,
pten::Flatten,
float,
paddle::platform::float16,
double,
uint8_t,
int8_t,
int,
int64_t) {}
PT_REGISTER_CTX_KERNEL(flatten_with_xshape,
GPU,
ALL_LAYOUT,
pten::FlattenWithXShape,
float,
paddle::platform::float16,
double,
uint8_t,
int8_t,
int,
int64_t) {}
#endif
#ifdef PADDLE_WITH_XPU
PT_REGISTER_CTX_KERNEL(flatten,
XPU,
ALL_LAYOUT,
pten::Flatten,
float,
paddle::platform::float16,
double,
uint8_t,
int8_t,
int,
int64_t) {}
PT_REGISTER_CTX_KERNEL(flatten_with_xshape,
XPU,
ALL_LAYOUT,
pten::FlattenWithXShape,
float,
paddle::platform::float16,
double,
uint8_t,
int8_t,
int,
int64_t) {}
#endif
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T, typename ContextT>
void Flatten(const ContextT& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out);
template <typename T, typename ContextT>
void FlattenWithXShape(const ContextT& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out,
DenseTensor* xshape);
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
namespace functions {
inline void SetXShape(const DenseTensor& x, DenseTensor* xshape) {
const auto& in_dims = x.meta().dims;
std::vector<int64_t> xshape_dims(in_dims.size() + 1);
xshape_dims[0] = 0;
for (int i = 0; i < in_dims.size(); ++i) {
xshape_dims[i + 1] = in_dims[i];
}
xshape->Resize(paddle::framework::make_ddim(xshape_dims));
xshape->ResetLoD(x.meta().lod);
}
} // namespace functions
} // namespace pten
...@@ -21,31 +21,6 @@ ...@@ -21,31 +21,6 @@
namespace pten { namespace pten {
template <typename T>
void Flatten(const GPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out) {
auto out_dims = out->dims();
pten::Copy(dev_ctx, x, false, out);
out->Resize(out_dims);
}
// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate
// Output Tensor,
// is there a more flexible way to deal with this case?
template <typename T>
void FlattenWithXShape(const GPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out,
DenseTensor* xshape) {
Flatten<T>(dev_ctx, x, start_axis, stop_axis, out);
general::SetXShape(x, xshape);
}
void Reshape(const GPUContext& dev_ctx, void Reshape(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const ScalarArray& shape, const ScalarArray& shape,
...@@ -84,28 +59,6 @@ void Cast(const GPUContext& dev_ctx, ...@@ -84,28 +59,6 @@ void Cast(const GPUContext& dev_ctx,
using float16 = paddle::platform::float16; using float16 = paddle::platform::float16;
PT_REGISTER_KERNEL(flatten,
GPU,
ALL_LAYOUT,
pten::Flatten,
float,
float16,
double,
uint8_t,
int8_t,
int,
int64_t) {}
PT_REGISTER_KERNEL(flatten_with_xshape,
GPU,
ALL_LAYOUT,
pten::FlattenWithXShape,
float,
double,
uint8_t,
int8_t,
int,
int64_t) {}
#define PTEN_REGISTER_CAST_CUDA_BASE_TYPE(op_name, ...) \ #define PTEN_REGISTER_CAST_CUDA_BASE_TYPE(op_name, ...) \
PT_REGISTER_KERNEL(cast, \ PT_REGISTER_KERNEL(cast, \
GPU, \ GPU, \
......
...@@ -24,13 +24,6 @@ ...@@ -24,13 +24,6 @@
namespace pten { namespace pten {
template <typename T>
void Flatten(const GPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out);
template <typename T> template <typename T>
void Cast(const GPUContext& dev_ctx, void Cast(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -19,38 +19,6 @@ ...@@ -19,38 +19,6 @@
namespace pten { namespace pten {
template <typename T>
void Flatten(const XPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out) {
auto out_dims = out->dims();
pten::Copy(dev_ctx, x, false, out);
out->Resize(out_dims);
}
// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate
// Output Tensor,
// is there a more flexible way to deal with this case?
template <typename T>
void FlattenWithXShape(const XPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out,
DenseTensor* xshape) {
Flatten<T>(dev_ctx, x, start_axis, stop_axis, out);
const auto& in_dims = x.dims();
std::vector<int64_t> xshape_dims(in_dims.size() + 1);
xshape_dims[0] = 0;
for (int i = 0; i < in_dims.size(); ++i) {
xshape_dims[i + 1] = in_dims[i];
}
xshape->Resize(paddle::framework::make_ddim(xshape_dims));
xshape->ResetLoD(x.lod());
}
void Reshape(const XPUContext& dev_ctx, void Reshape(const XPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const ScalarArray& shape, const ScalarArray& shape,
...@@ -76,29 +44,5 @@ void ReshapeWithXShape(const XPUContext& dev_ctx, ...@@ -76,29 +44,5 @@ void ReshapeWithXShape(const XPUContext& dev_ctx,
} // namespace pten } // namespace pten
PT_REGISTER_KERNEL(flatten,
XPU,
ALL_LAYOUT,
pten::Flatten,
float,
paddle::platform::float16,
double,
uint8_t,
int8_t,
int,
int64_t) {}
PT_REGISTER_KERNEL(flatten_with_xshape,
XPU,
ALL_LAYOUT,
pten::FlattenWithXShape,
float,
paddle::platform::float16,
double,
uint8_t,
int8_t,
int,
int64_t) {}
PT_REGISTER_NO_TEMPLATE_KERNEL( PT_REGISTER_NO_TEMPLATE_KERNEL(
reshape, XPU, ALL_LAYOUT, pten::Reshape, ALL_DTYPE) {} reshape, XPU, ALL_LAYOUT, pten::Reshape, ALL_DTYPE) {}
...@@ -23,13 +23,6 @@ limitations under the License. */ ...@@ -23,13 +23,6 @@ limitations under the License. */
namespace pten { namespace pten {
template <typename T>
void Flatten(const XPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out);
void Reshape(const XPUContext& dev_ctx, void Reshape(const XPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const ScalarArray& shape, const ScalarArray& shape,
......
...@@ -21,6 +21,16 @@ limitations under the License. */ ...@@ -21,6 +21,16 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
PT_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT);
#endif
#ifdef PADDLE_WITH_XPU
PT_DECLARE_KERNEL(copy, XPU, ALL_LAYOUT);
#endif
namespace pten { namespace pten {
namespace tests { namespace tests {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册