diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc index 14f2e9061b742f002d2a6dbb1fa26d84ee81afc4..517422af1f6aad52e42fb4951a97ae2cff798f47 100644 --- a/paddle/fluid/operators/flatten_op.cc +++ b/paddle/fluid/operators/flatten_op.cc @@ -79,14 +79,6 @@ class FlattenOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); - - //#ifdef PADDLE_WITH_MKLDNN - // if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { - // return framework::OpKernelType(input_data_type, ctx.GetPlace(), - // framework::DataLayout::kMKLDNN, - // framework::LibraryType::kMKLDNN); - // } - //#endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -157,14 +149,6 @@ class FlattenGradOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType( ctx, framework::GradVarName("Out")); - - //#ifdef PADDLE_WITH_MKLDNN - // if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { - // return framework::OpKernelType(input_data_type, ctx.GetPlace(), - // framework::DataLayout::kMKLDNN, - // framework::LibraryType::kMKLDNN); - // } - //#endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -227,14 +211,6 @@ class Flatten2Op : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); - - //#ifdef PADDLE_WITH_MKLDNN - // if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { - // return framework::OpKernelType(input_data_type, ctx.GetPlace(), - // framework::DataLayout::kMKLDNN, - // framework::LibraryType::kMKLDNN); - // } - //#endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -285,14 +261,6 @@ class Flatten2GradOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType( ctx, framework::GradVarName("Out")); - - //#ifdef PADDLE_WITH_MKLDNN - // if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { - // return framework::OpKernelType(input_data_type, ctx.GetPlace(), - // framework::DataLayout::kMKLDNN, - // framework::LibraryType::kMKLDNN); - // } - //#endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -365,6 +333,18 @@ class FlattenContiguousRangeOp : public framework::OperatorWithKernel { return out_shape; } + + framework::KernelSignature GetExpectedPtenKernelArgs( + const framework::ExecutionContext &ctx) const override { + if (ctx.HasOutput("XShape")) { + return framework::KernelSignature("flatten_contiguous_range.mid", {"X"}, + {"start_axis", "stop_axis"}, + {"Out", "XShape"}); + } else { + return framework::KernelSignature("flatten_contiguous_range", {"X"}, + {"start_axis", "stop_axis"}, {"Out"}); + } + } }; class FlattenContiguousRangeOpMaker : public FlattenOpMaker { diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h index efcb0cbe2e2a8d8bbf964cc4f2d2496e6a6fa991..7d08a95821138e0a96fcc246119f5d2e41a25ed2 100644 --- a/paddle/fluid/operators/flatten_op.h +++ b/paddle/fluid/operators/flatten_op.h @@ -15,10 +15,13 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/include/core.h" +#include "paddle/pten/include/manipulation.h" namespace paddle { namespace operators { @@ -122,13 +125,16 @@ class FlattenContiguousRangeKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &context) const override { auto *in = context.Input("X"); auto *out = context.Output("Out"); - auto out_dims = out->dims(); - out->mutable_data(context.GetPlace(), in->type()); - framework::TensorCopy( - *in, context.GetPlace(), - context.template device_context(), out); - out->Resize(out_dims); + auto &start_axis = context.Attr("start_axis"); + auto &stop_axis = context.Attr("stop_axis"); + auto &dev_ctx = context.device_context(); + + auto pt_x = paddle::experimental::MakePtenDenseTensor(*in); + auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); + + // call new kernel + pten::Flatten(dev_ctx, *pt_x.get(), start_axis, stop_axis, pt_out.get()); } }; diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 0444fa593c0ac392de49a3b44d8a1a77a96aabd5..e72ec1f8ae65a374b01ada040397b3eb7843b45c 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -17,5 +17,7 @@ set(PTEN_DEPS ${PTEN_DEPS} unary binary) if(WITH_GPU OR WITH_ROCM) set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda creation_cuda manipulation_cuda) endif() - +if(WITH_XPU) + set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu) +endif() cc_library(pten SRCS all.cc DEPS ${PTEN_DEPS}) diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h index 236f7c7af956199066954aed5a05161e21614a67..e10f296dbd0f960835f4006a431ba1dc6af9b6b8 100644 --- a/paddle/pten/include/manipulation.h +++ b/paddle/pten/include/manipulation.h @@ -19,6 +19,7 @@ #include "paddle/pten/include/infershape.h" #include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/kernels/cuda/manipulation.h" +#include "paddle/pten/kernels/xpu/manipulation.h" namespace pten { diff --git a/paddle/pten/kernels/xpu/CMakeLists.txt b/paddle/pten/kernels/xpu/CMakeLists.txt index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3ba070bdd6c96cbb34abc7de6a84d65f7c6cea9f 100644 --- a/paddle/pten/kernels/xpu/CMakeLists.txt +++ b/paddle/pten/kernels/xpu/CMakeLists.txt @@ -0,0 +1,2 @@ +cc_library(utils_xpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils) +cc_library(manipulation_xpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_xpu unary) diff --git a/paddle/pten/kernels/xpu/manipulation.cc b/paddle/pten/kernels/xpu/manipulation.cc new file mode 100644 index 0000000000000000000000000000000000000000..379e459a605150b32338f9a7d82efc4d09eeb200 --- /dev/null +++ b/paddle/pten/kernels/xpu/manipulation.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/xpu/manipulation.h" +#include "paddle/pten/infershape/unary.h" +#include "paddle/pten/kernels/xpu/utils.h" + +namespace pten { + +template +void Flatten(const XPUContext& dev_ctx, + const DenseTensor& x, + int start_axis, + int stop_axis, + DenseTensor* out) { + auto out_dims = out->dims(); + pten::Copy(dev_ctx, x, out); + out->Resize(out_dims); +} + +// TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate +// Output Tensor, +// is there a more flexible way to deal with this case? +template +void FlattenWithXShape(const XPUContext& dev_ctx, + const DenseTensor& x, + int start_axis, + int stop_axis, + DenseTensor* out, + DenseTensor* xshape) { + Flatten(dev_ctx, x, start_axis, stop_axis, out); + const auto& in_dims = x.dims(); + std::vector xshape_dims(in_dims.size() + 1); + xshape_dims[0] = 0; + for (int i = 0; i < in_dims.size(); ++i) { + xshape_dims[i + 1] = in_dims[i]; + } + xshape->Resize(paddle::framework::make_ddim(xshape_dims)); + xshape->set_lod(x.lod()); +} + +} // namespace pten + +// TODO(chenweihang): replace by better impl +PT_REGISTER_MODULE(ManipulationXPU); + +// TODO(yuanrisheng): "flatten_contiguous_range" is compatible with old kernel +// architecture, kernel_name should be "flatten". +PT_REGISTER_KERNEL("flatten_contiguous_range", + XPU, + ANY, + pten::Flatten, + float, + paddle::platform::float16, + double, + uint8_t, + int8_t, + int, + int64_t) {} + +PT_REGISTER_KERNEL("flatten_contiguous_range.mid", + XPU, + ANY, + pten::FlattenWithXShape, + float, + paddle::platform::float16, + double, + uint8_t, + int8_t, + int, + int64_t) {} diff --git a/paddle/pten/kernels/xpu/manipulation.h b/paddle/pten/kernels/xpu/manipulation.h new file mode 100644 index 0000000000000000000000000000000000000000..02947759b477e69cc3e249f63c693148e060cba4 --- /dev/null +++ b/paddle/pten/kernels/xpu/manipulation.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef PADDLE_WITH_XPU + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/platform/device_context.h" + +namespace pten { + +using XPUContext = paddle::platform::XPUDeviceContext; + +template +void Flatten(const XPUContext& dev_ctx, + const DenseTensor& x, + int start_axis, + int stop_axis, + DenseTensor* out); + +} // namespace pten + +#endif diff --git a/paddle/pten/kernels/xpu/utils.cc b/paddle/pten/kernels/xpu/utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..33bdc66ff01f36a9236d562bc1eb540cb13c7234 --- /dev/null +++ b/paddle/pten/kernels/xpu/utils.cc @@ -0,0 +1,81 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/xpu/utils.h" +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/pten/common/data_type.h" +#include "paddle/pten/core/convert_utils.h" + +namespace pten { + +void Copy(const XPUDeviceContext& dev_ctx, + const DenseTensor& src, + DenseTensor* dst) { + auto* src_ptr = src.data(); + auto* dst_ptr = dst->mutable_data(); + const auto& src_place = src.place(); + const auto& dst_place = dst->place(); + + if (src_ptr == dst_ptr && src_place == dst_place) { + VLOG(3) << "Skip copy the same data async from " << src_place << " to " + << dst_place; + return; + } + VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; + + VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " + << dst_place; + dst->Resize(src.dims()); + CHECK(dst->layout() == src.layout()); + auto size = src.numel() * paddle::framework::SizeOfType( + TransToProtoVarType(src.data_type())); + + if (paddle::platform::is_xpu_place(src_place) && // NOLINT + paddle::platform::is_cpu_place(dst_place)) { + paddle::memory::Copy(BOOST_GET_CONST(paddle::platform::CPUPlace, dst_place), + dst_ptr, + BOOST_GET_CONST(paddle::platform::XPUPlace, src_place), + src_ptr, + size); + } else if (paddle::platform::is_cpu_place(src_place) && + paddle::platform::is_xpu_place(dst_place)) { + paddle::memory::Copy(BOOST_GET_CONST(paddle::platform::XPUPlace, dst_place), + dst_ptr, + BOOST_GET_CONST(paddle::platform::CPUPlace, src_place), + src_ptr, + size); + } else if (paddle::platform::is_xpu_place(src_place) && + paddle::platform::is_xpu_place(dst_place)) { + if (src_ptr == dst_ptr) { + VLOG(3) << "Skip copy the same data async from " << src_place << " to " + << dst_place; + return; + } + paddle::memory::Copy(BOOST_GET_CONST(paddle::platform::XPUPlace, dst_place), + dst_ptr, + BOOST_GET_CONST(paddle::platform::XPUPlace, src_place), + src_ptr, + size); + } else { + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "Copy from %s to %s is not supported.", src_place, dst_place)); + } +} + +} // namespace pten + +// TODO(chenweihang): replace by better impl +PT_REGISTER_MODULE(UtilsXPU); + +PT_REGISTER_KERNEL_WITH_NO_TYPE("copy", XPU, ANY, pten::Copy) {} diff --git a/paddle/pten/kernels/xpu/utils.h b/paddle/pten/kernels/xpu/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..c92812ed6884243538fe05d2639f020757cb50b9 --- /dev/null +++ b/paddle/pten/kernels/xpu/utils.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef PADDLE_WITH_XPU + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/platform/device_context.h" +namespace pten { + +using XPUDeviceContext = paddle::platform::XPUDeviceContext; + +void Copy(const XPUDeviceContext& dev_ctx, + const DenseTensor& src, + DenseTensor* dst); + +} // namespace pten + +#endif