未验证 提交 88ec08a7 编写于 作者: F From00 提交者: GitHub

Move Pool OPs to phi (#40208)

* Move Pool OPs to phi

* Fix CI error

* Fix conflicts
上级 5ab2cec5
......@@ -297,7 +297,8 @@ phi::InferMetaContext BuildInferMetaContext(InferShapeContext* ctx,
VLOG(3) << "BuildInferMetaContext: op kernel signature - " << signature;
// 2. build infermeta context
phi::InferMetaContext infer_meta_context(ctx->IsRuntime());
phi::InferMetaContext infer_meta_context(
{ctx->IsRuntime(), ctx->IsRunMKLDNNKernel()});
auto& input_names = std::get<0>(signature.args);
auto& attr_names = std::get<1>(signature.args);
......
......@@ -264,14 +264,23 @@ void BuildDygraphPhiKernelContext(
size_t start_idx = (i == 0 ? 0 : kernel_ctx->InputRangeAt(i - 1).second);
if ((it == ins.end()) &&
(input_defs[i].type_index ==
std::type_index(typeid(paddle::optional<const phi::DenseTensor&>)))) {
kernel_ctx->EmplaceBackInputWithoutSetRange(nullptr);
auto end_idx = start_idx + 1;
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
continue;
if (it == ins.end()) {
if (LIKELY(input_defs[i].type_index ==
std::type_index(
typeid(paddle::optional<const phi::DenseTensor&>)))) {
kernel_ctx->EmplaceBackInputWithoutSetRange(nullptr);
auto end_idx = start_idx + 1;
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
continue;
} else {
PADDLE_THROW(phi::errors::NotFound(
"Can not find input variable '%s' for %s OP, please check whether "
"the name setting in OpArgumentMapping is consistent with that in "
"OpMaker.",
input_names[i], pt_kernel_signature.name));
}
}
auto ins_vector = it->second;
size_t end_idx = start_idx + ins_vector.size();
......
......@@ -328,5 +328,5 @@ class Pool2dOpConverter : public OpConverter {
} // namespace inference
} // namespace paddle
USE_OP(pool2d);
USE_OP_ITSELF(pool2d);
REGISTER_TRT_OP_CONVERTER(pool2d, Pool2dOpConverter);
......@@ -224,5 +224,5 @@ class Pool3dOpConverter : public OpConverter {
} // namespace inference
} // namespace paddle
USE_OP(pool3d);
USE_OP_ITSELF(pool3d);
REGISTER_TRT_OP_CONVERTER(pool3d, Pool3dOpConverter);
......@@ -71,4 +71,4 @@ TEST(Pool2dOpConverter, avg_ceil_test) { test_pool2d(false, true, "avg"); }
} // namespace inference
} // namespace paddle
USE_OP(pool2d);
USE_OP_ITSELF(pool2d);
......@@ -13,7 +13,7 @@
// limitations under the License.
#include "paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.h"
#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/phi/kernels/funcs/pooling.h"
namespace paddle {
namespace inference {
......@@ -108,16 +108,14 @@ int Pool3DPlugin::enqueue(int batchSize, const void *const *inputs,
output_shape.insert(output_shape.begin(), batchSize);
if (pool3d_type_ == Pool3DType::max) {
paddle::operators::math::MaxPool<float> pool_process;
paddle::operators::math::Pool3dDirectCUDAFunctor<
paddle::operators::math::MaxPool<float>, float>
phi::funcs::MaxPool<float> pool_process;
phi::funcs::Pool3dDirectCUDAFunctor<phi::funcs::MaxPool<float>, float>
pool3d_forward;
pool3d_forward(idata, input_shape, output_shape, ksize_, strides_,
paddings_, true, adaptive_, odatas[0], stream, pool_process);
} else if (pool3d_type_ == Pool3DType::avg) {
paddle::operators::math::AvgPool<float> pool_process;
paddle::operators::math::Pool3dDirectCUDAFunctor<
paddle::operators::math::AvgPool<float>, float>
phi::funcs::AvgPool<float> pool_process;
phi::funcs::Pool3dDirectCUDAFunctor<phi::funcs::AvgPool<float>, float>
pool3d_forward;
pool3d_forward(idata, input_shape, output_shape, ksize_, strides_,
paddings_, true, adaptive_, odatas[0], stream, pool_process);
......@@ -351,16 +349,14 @@ int Pool3DPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
}
if (pool3d_type_ == "max") {
paddle::operators::math::MaxPool<float> pool_process;
paddle::operators::math::Pool3dDirectCUDAFunctor<
paddle::operators::math::MaxPool<float>, float>
phi::funcs::MaxPool<float> pool_process;
phi::funcs::Pool3dDirectCUDAFunctor<phi::funcs::MaxPool<float>, float>
pool3d_forward;
pool3d_forward(input, input_shape, output_shape, ksize, strides_, paddings,
true, adaptive_, output, stream, pool_process);
} else if (pool3d_type_ == "avg") {
paddle::operators::math::AvgPool<float> pool_process;
paddle::operators::math::Pool3dDirectCUDAFunctor<
paddle::operators::math::AvgPool<float>, float>
phi::funcs::AvgPool<float> pool_process;
phi::funcs::Pool3dDirectCUDAFunctor<phi::funcs::AvgPool<float>, float>
pool3d_forward;
pool3d_forward(input, input_shape, output_shape, ksize, strides_, paddings,
true, adaptive_, output, stream, pool_process);
......
......@@ -13,7 +13,7 @@
// limitations under the License.
#include "paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h"
#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/phi/kernels/funcs/pooling.h"
namespace paddle {
namespace inference {
......@@ -84,16 +84,14 @@ int PoolPlugin::enqueue(int batchSize, const void *const *inputs,
output_shape.insert(output_shape.begin(), batchSize);
if (pool_type_ == PoolType::max) {
paddle::operators::math::MaxPool<float> pool_process;
paddle::operators::math::Pool2dDirectCUDAFunctor<
paddle::operators::math::MaxPool<float>, float>
phi::funcs::MaxPool<float> pool_process;
phi::funcs::Pool2dDirectCUDAFunctor<phi::funcs::MaxPool<float>, float>
pool2d_forward;
pool2d_forward(idata, input_shape, output_shape, ksize_, strides_,
paddings_, true, false, odatas[0], stream, pool_process);
} else if (pool_type_ == PoolType::avg) {
paddle::operators::math::AvgPool<float> pool_process;
paddle::operators::math::Pool2dDirectCUDAFunctor<
paddle::operators::math::AvgPool<float>, float>
phi::funcs::AvgPool<float> pool_process;
phi::funcs::Pool2dDirectCUDAFunctor<phi::funcs::AvgPool<float>, float>
pool2d_forward;
pool2d_forward(idata, input_shape, output_shape, ksize_, strides_,
paddings_, exclusive_, adaptive_, odatas[0], stream,
......@@ -292,16 +290,14 @@ int PoolPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
}
if (pool_type_ == "max") {
paddle::operators::math::MaxPool<float> pool_process;
paddle::operators::math::Pool2dDirectCUDAFunctor<
paddle::operators::math::MaxPool<float>, float>
phi::funcs::MaxPool<float> pool_process;
phi::funcs::Pool2dDirectCUDAFunctor<phi::funcs::MaxPool<float>, float>
pool2d_forward;
pool2d_forward(input, input_shape, output_shape, ksize, strides_, paddings,
true, false, output, stream, pool_process);
} else if (pool_type_ == "avg") {
paddle::operators::math::AvgPool<float> pool_process;
paddle::operators::math::Pool2dDirectCUDAFunctor<
paddle::operators::math::AvgPool<float>, float>
phi::funcs::AvgPool<float> pool_process;
phi::funcs::Pool2dDirectCUDAFunctor<phi::funcs::AvgPool<float>, float>
pool2d_forward;
pool2d_forward(input, input_shape, output_shape, ksize, strides_, paddings,
exclusive_, adaptive_, output, stream, pool_process);
......
......@@ -16,7 +16,6 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/phi_utils.h"
#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/flatten_grad_kernel.h"
......
......@@ -20,7 +20,6 @@ math_library(sampler DEPS generator)
# math_library(math_function DEPS blas dense_tensor tensor)
math_library(maxouting)
math_library(pooling)
if(WITH_MKLDNN)
math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler)
......
......@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/kernels/funcs/pooling.h"
namespace paddle {
namespace operators {
using framework::DataLayout;
using framework::Tensor;
using dnnl::memory;
using dnnl::pooling_backward;
using dnnl::pooling_forward;
......@@ -83,11 +85,11 @@ class PoolingMKLDNNHandler
phi::slice_ddim(input_dims, 2, input_dims.size());
if (global_pooling) {
operators::UpdateKsize(&ksize, data_dims);
phi::funcs::UpdateKernelSize(&ksize, data_dims);
}
operators::UpdatePadding(&paddings, global_pooling, 0, padding_algorithm,
data_dims, strides, ksize);
phi::funcs::UpdatePadding(&paddings, global_pooling, 0, padding_algorithm,
data_dims, strides, ksize);
const auto src_tz = phi::vectorize(input->dims());
const auto dst_tz = phi::vectorize(output->dims());
......@@ -173,11 +175,11 @@ class PoolingMKLDNNHandler
framework::DDim data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size());
if (global_pooling) {
operators::UpdateKsize(&ksize, data_dims);
phi::funcs::UpdateKernelSize(&ksize, data_dims);
}
operators::UpdatePadding(&paddings, global_pooling, 0, padding_algorithm,
data_dims, strides, ksize);
phi::funcs::UpdatePadding(&paddings, global_pooling, 0, padding_algorithm,
data_dims, strides, ksize);
auto src_tz = phi::vectorize<int64_t>(in_x->dims());
auto diff_src_tz = phi::vectorize<int64_t>(in_x_grad->dims());
......
......@@ -26,13 +26,14 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/kernel_registry.h"
USE_OP(pool2d);
USE_OP_ITSELF(pool2d);
USE_OP_DEVICE_KERNEL(pool2d, MKLDNN);
USE_OP_ITSELF(relu);
USE_OP_DEVICE_KERNEL(relu, MKLDNN);
USE_OP_ITSELF(transpose);
USE_OP_DEVICE_KERNEL(transpose, MKLDNN);
PD_DECLARE_KERNEL(pool2d, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(relu, CPU, ALL_LAYOUT);
namespace paddle {
......
此差异已折叠。
......@@ -15,6 +15,12 @@ limitations under the License. */
#include "paddle/fluid/operators/pool_op.h"
#include <unordered_map>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
......@@ -23,125 +29,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
int PoolOutputSize(int input_size, int filter_size, int padding_1,
int padding_2, int stride, bool ceil_mode) {
int output_size;
if (!ceil_mode) {
output_size =
(input_size - filter_size + padding_1 + padding_2) / stride + 1;
} else {
output_size =
(input_size - filter_size + padding_1 + padding_2 + stride - 1) /
stride +
1;
}
PADDLE_ENFORCE_GT(
output_size, 0,
platform::errors::InvalidArgument(
"the output size must be greater than 0. But received: "
"output_size = %d due to the settings of input_size(%d), "
"padding(%d,%d), "
"k_size(%d) and stride(%d). Please check again!",
output_size, input_size, padding_1, padding_2, filter_size, stride));
return output_size;
}
void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ(
ctx->HasInput("X"), true,
platform::errors::NotFound("Input(X) of Pool operator is not found."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("Out"), true,
platform::errors::NotFound("Output(Out) of Pool operator is not found."));
std::string pooling_type = ctx->Attrs().Get<std::string>("pooling_type");
std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
bool ceil_mode = ctx->Attrs().Get<bool>("ceil_mode");
bool adaptive = ctx->Attrs().Get<bool>("adaptive");
bool global_pooling = ctx->Attrs().Get<bool>("global_pooling");
std::string data_format = ctx->Attrs().Get<std::string>("data_format");
std::string padding_algorithm =
ctx->Attrs().Get<std::string>("padding_algorithm");
auto in_x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(
in_x_dims.size() == 4 || in_x_dims.size() == 5, true,
platform::errors::InvalidArgument(
"the input of Op(pool) should be 4-D or 5-D Tensor. But "
"received: %u-D Tensor and it's shape is [%s].",
in_x_dims.size(), in_x_dims));
PADDLE_ENFORCE_EQ(
in_x_dims.size() - ksize.size(), 2U,
platform::errors::InvalidArgument(
"the dimension of input minus the size of "
"Attr(ksize) must be euqal to 2 in Op(pool). "
"But received: the dimension of input minus the size "
"of Attr(ksize) is %d, the "
"input's dimension is %d, the shape of input "
"is [%s], the Attr(ksize)'s size is %d, the Attr(ksize) is [%s].",
in_x_dims.size() - ksize.size(), in_x_dims.size(), in_x_dims,
ksize.size(), phi::make_ddim(ksize)));
PADDLE_ENFORCE_EQ(
ksize.size(), strides.size(),
platform::errors::InvalidArgument(
"the size of Attr(ksize) and Attr(strides) in "
"Op(pool) must be equal. "
"But received: Attr(ksize)'s size is %d, Attr(strides)'s "
"size is %d, Attr(ksize) is [%s], Attr(strides)is [%s].",
ksize.size(), strides.size(), phi::make_ddim(ksize),
phi::make_ddim(strides)));
// MKL-DNN Kernels are using NCHW order of dims description
// so we ignore data_format consideration for MKL-DNN kernel
const bool channel_last = (ctx->IsRunMKLDNNKernel() == false) &&
(data_format == "NHWC" || data_format == "NDHWC");
// update paddings if "SAME" or global_pooling
framework::DDim data_dims;
if (channel_last) {
data_dims = phi::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
} else {
data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size());
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
std::vector<int64_t> output_shape;
if (adaptive) {
output_shape.insert(output_shape.end(), ksize.begin(), ksize.end());
} else {
for (int i = 0; i < data_dims.size(); ++i) {
if ((!ctx->IsRuntime()) && (data_dims[i] < 0)) {
output_shape.push_back(data_dims[i]);
} else {
output_shape.push_back(
PoolOutputSize(data_dims[i], ksize[i], paddings[2 * i],
paddings[2 * i + 1], strides[i], ceil_mode));
}
}
}
// output_N = input_N
output_shape.insert(output_shape.begin(), in_x_dims[0]);
// output_C = input_C
if (channel_last) {
output_shape.push_back(in_x_dims[in_x_dims.size() - 1]);
} else {
output_shape.insert(output_shape.begin() + 1, in_x_dims[1]);
}
ctx->SetOutputDim("Out", phi::make_ddim(output_shape));
ctx->ShareLoD("X", "Out");
}
bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) {
if (ctx.Attr<bool>("adaptive") == false) return true;
// (jczaja): oneDNN is supporting only unchangable in size pool window
......@@ -216,16 +103,6 @@ framework::OpKernelType PoolOp::GetKernelTypeForVar(
tensor.place(), tensor.layout());
}
void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
platform::errors::NotFound(
"Input(X) of Pool Gradoperator is not found."));
PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true,
platform::errors::NotFound(
"Input(X@GRAD) of Pool Gradoperator is not found."));
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
......@@ -471,7 +348,7 @@ class Pool2dOpGradGradMaker : public framework::SingleGradOpMaker<T> {
protected:
void Apply(GradOpPtr<T> grad_op) const override {
grad_op->SetType("pool2d_grad_grad");
grad_op->SetType("pool2d_double_grad");
grad_op->SetInput("X", this->OutputGrad(framework::GradVarName("X")));
grad_op->SetOutput("Out", this->InputGrad(framework::GradVarName("Out")));
grad_op->SetAttrMap(this->Attrs());
......@@ -692,35 +569,34 @@ Example:
namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(pool2d, Pool2dInferShapeFunctor,
PD_INFER_META(phi::PoolInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(pool2d_grad, Pool2dGradInferShapeFunctor,
PD_INFER_META(phi::PoolGradInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(pool2d_double_grad,
Pool2dDoubleGradInferShapeFunctor,
PD_INFER_META(phi::PoolInferMeta));
REGISTER_OPERATOR(
pool2d, ops::PoolOp, ops::Pool2dOpMaker, ops::PoolOpInferVarType,
paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>,
Pool2dInferShapeFunctor);
REGISTER_OPERATOR(pool2d_grad, ops::PoolOpGrad,
ops::Pool2dOpGradGradMaker<paddle::framework::OpDesc>,
ops::Pool2dOpGradGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(pool2d_grad_grad, ops::PoolOp);
REGISTER_OP_CPU_KERNEL(
pool2d, ops::PoolKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
pool2d_grad, ops::PoolGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
pool2d_grad_grad,
ops::PoolGradGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolGradGradKernel<paddle::platform::CPUDeviceContext, double>);
ops::Pool2dOpGradGradMaker<paddle::imperative::OpBase>,
Pool2dGradInferShapeFunctor);
REGISTER_OPERATOR(pool2d_double_grad, ops::PoolOp,
Pool2dDoubleGradInferShapeFunctor);
DECLARE_INFER_SHAPE_FUNCTOR(pool3d, Pool3dInferShapeFunctor,
PD_INFER_META(phi::PoolInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(pool3d_grad, Pool3dGradInferShapeFunctor,
PD_INFER_META(phi::PoolGradInferMeta));
REGISTER_OPERATOR(
pool3d, ops::PoolOp, ops::Pool3dOpMaker, ops::PoolOpInferVarType,
paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
REGISTER_OPERATOR(pool3d_grad, ops::PoolOpGrad);
REGISTER_OP_CPU_KERNEL(
pool3d, ops::PoolKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
pool3d_grad, ops::PoolGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolGradKernel<paddle::platform::CPUDeviceContext, double>);
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>,
Pool3dInferShapeFunctor);
REGISTER_OPERATOR(pool3d_grad, ops::PoolOpGrad, Pool3dGradInferShapeFunctor);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pool_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
pool2d, ops::PoolKernel<paddle::platform::CUDADeviceContext, float>,
ops::PoolKernel<paddle::platform::CUDADeviceContext, double>,
ops::PoolKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
pool2d_grad,
ops::PoolGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::PoolGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::PoolGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
pool2d_grad_grad,
ops::PoolGradGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::PoolGradGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::PoolGradGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
pool3d, ops::PoolKernel<paddle::platform::CUDADeviceContext, float>,
ops::PoolKernel<paddle::platform::CUDADeviceContext, double>,
ops::PoolKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
pool3d_grad,
ops::PoolGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::PoolGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::PoolGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
......@@ -12,19 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// NOTE(Ruibiao): Difficult to remove code from this header file because too
// many files rely on it through "mkldnn_reuse.h"
#include <algorithm>
#include <string>
#include <vector>
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(__HIPCC__) || defined(__NVCC__)
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#endif
namespace paddle {
namespace operators {
......@@ -35,8 +28,6 @@ class PoolOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
......@@ -50,8 +41,6 @@ class PoolOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
......@@ -71,292 +60,5 @@ class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override;
};
template <typename T = int>
inline void UpdatePadding(std::vector<T>* paddings, const bool global_pooling,
const bool adaptive,
const std::string padding_algorithm,
const framework::DDim data_dims,
const std::vector<T>& strides,
const std::vector<T>& ksize) {
// set padding size == data_dims.size() * 2
auto data_shape = phi::vectorize<T>(data_dims);
if (static_cast<int>(paddings->size()) == data_dims.size()) {
for (int i = 0; i < data_dims.size(); ++i) {
T copy_pad = *(paddings->begin() + 2 * i);
paddings->insert(paddings->begin() + 2 * i + 1, copy_pad);
}
} else {
PADDLE_ENFORCE_EQ(data_dims.size() * 2, paddings->size(),
platform::errors::InvalidArgument(
"Paddings size %d should be the same or twice as the "
"pooling size %d.",
paddings->size(), data_dims.size() * 2));
}
// when padding_algorithm is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (int i = 0; i < data_dims.size(); ++i) {
T out_size = (data_dims[i] + strides[i] - 1) / strides[i];
T pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_shape[i],
static_cast<T>(0));
T pad_0 = pad_sum / 2;
T pad_1 = pad_sum - pad_0;
*(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
}
} else if (padding_algorithm == "VALID") {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
// if global_pooling == true or adaptive == true, padding will be ignore
if (global_pooling || adaptive) {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
}
template <typename T = int>
inline void UpdateKsize(std::vector<T>* ksize,
const framework::DDim data_dims) {
ksize->resize(static_cast<size_t>(data_dims.size()));
for (size_t i = 0; i < ksize->size(); ++i) {
*(ksize->begin() + i) = static_cast<T>(data_dims[i]);
}
}
inline int getReduceNum(const framework::Tensor& input,
const framework::Tensor* output,
const std::string data_format,
std::vector<int>* reduce_dim) {
// data_format only can be NCHW
bool channel_last = (data_format == "NHWC");
if (channel_last) {
return 0;
}
int reduce_num = 0;
const int output_height = output->dims()[2];
const int output_width = output->dims()[3];
if ((output_height == 1) && (output_width == 1)) {
reduce_dim->push_back(2);
reduce_dim->push_back(3);
reduce_num = input.dims()[2] * input.dims()[3];
}
return reduce_num;
}
template <typename DeviceContext, typename T>
class PoolKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* in_x = context.Input<Tensor>("X");
Tensor* out = context.Output<Tensor>("Out");
std::string pooling_type = context.Attr<std::string>("pooling_type");
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::string data_format = context.Attr<std::string>("data_format");
bool exclusive = context.Attr<bool>("exclusive");
bool adaptive = context.Attr<bool>("adaptive");
bool global_pooling = context.Attr<bool>("global_pooling");
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// update paddings
auto in_x_dims = in_x->dims();
framework::DDim data_dims;
if (channel_last) {
data_dims = phi::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
} else {
data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size());
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
if (data_dims.size() * 2 == static_cast<int>(paddings.size())) {
for (int i = 0; i < data_dims.size(); ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
auto& dev_ctx = context.template device_context<DeviceContext>();
switch (ksize.size()) {
case 2: {
if (pooling_type == "max") {
paddle::operators::math::Pool2dFunctor<
DeviceContext, paddle::operators::math::MaxPool<T>, T>
pool2d_forward;
paddle::operators::math::MaxPool<T> pool_process;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format,
true, false, out, pool_process);
} else if (pooling_type == "avg") {
std::vector<int> reduce_dim;
int reduce_num = getReduceNum(*in_x, out, data_format, &reduce_dim);
if (reduce_num > 0 &&
adaptive) { // for adaptive_avg_pool2d && output_size == 1
#if defined(__HIPCC__) || defined(__NVCC__)
auto stream = dev_ctx.stream();
TensorReduceImpl<T, T, kps::AddFunctor, kps::DivideFunctor<T>>(
dev_ctx, *in_x, out, kps::DivideFunctor<T>(reduce_num),
reduce_dim, stream);
#else // for cpu
paddle::operators::math::Pool2dFunctor<
DeviceContext, paddle::operators::math::AvgPool<T>, T>
pool2d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings,
data_format, exclusive, adaptive, out, pool_process);
#endif
} else { // avgpool_2d or adaptive_avg_pool2d && output_size != 1
paddle::operators::math::Pool2dFunctor<
DeviceContext, paddle::operators::math::AvgPool<T>, T>
pool2d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings,
data_format, exclusive, adaptive, out, pool_process);
}
}
} break;
case 3: {
if (pooling_type == "max") {
paddle::operators::math::Pool3dFunctor<
DeviceContext, paddle::operators::math::MaxPool<T>, T>
pool3d_forward;
paddle::operators::math::MaxPool<T> pool_process;
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format,
true, false, out, pool_process);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool3dFunctor<
DeviceContext, paddle::operators::math::AvgPool<T>, T>
pool3d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format,
exclusive, adaptive, out, pool_process);
}
} break;
default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op only supports 2D and 3D input."));
}
}
}
};
template <typename DeviceContext, typename T>
class PoolGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* in_x = context.Input<Tensor>("X");
const Tensor* out = context.Input<Tensor>("Out");
const Tensor* out_grad =
context.Input<Tensor>(framework::GradVarName("Out"));
Tensor* in_x_grad = context.Output<Tensor>(framework::GradVarName("X"));
std::string pooling_type = context.Attr<std::string>("pooling_type");
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
bool exclusive = context.Attr<bool>("exclusive");
bool adaptive = context.Attr<bool>("adaptive");
std::string data_format = context.Attr<std::string>("data_format");
bool global_pooling = context.Attr<bool>("global_pooling");
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// update paddings
auto in_x_dims = in_x->dims();
framework::DDim data_dims;
if (channel_last) {
data_dims = phi::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
} else {
data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size());
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
if (data_dims.size() * 2 == static_cast<int>(paddings.size())) {
for (int i = 0; i < data_dims.size(); ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
auto& dev_ctx = context.template device_context<DeviceContext>();
if (in_x_grad) {
in_x_grad->mutable_data<T>(context.GetPlace());
phi::funcs::SetConstant<DeviceContext, T> set_constant;
set_constant(dev_ctx, in_x_grad, static_cast<T>(0.0));
switch (ksize.size()) {
case 2: {
if (pooling_type == "max") {
paddle::operators::math::MaxPool2dGradFunctor<DeviceContext, T>
pool2d_backward;
pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, data_format, in_x_grad);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool2dGradFunctor<
DeviceContext, paddle::operators::math::AvgPoolGrad<T>, T>
pool2d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process;
pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, data_format, exclusive, adaptive,
in_x_grad, pool_process);
}
} break;
case 3: {
if (pooling_type == "max") {
paddle::operators::math::MaxPool3dGradFunctor<DeviceContext, T>
pool3d_backward;
pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, data_format, in_x_grad);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool3dGradFunctor<
DeviceContext, paddle::operators::math::AvgPoolGrad<T>, T>
pool3d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process;
pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, data_format, exclusive, adaptive,
in_x_grad, pool_process);
}
} break;
default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op only supports 2D and 3D input."));
}
}
}
}
};
template <typename DeviceContext, typename T>
class PoolGradGradKernel : public PoolKernel<DeviceContext, T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
std::string pooling_type = context.Attr<std::string>("pooling_type");
if (pooling_type == "max") {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op grad grad only supports avgpool."));
} else {
PoolKernel<DeviceContext, T>::Compute(context);
}
}
};
} // namespace operators
} // namespace paddle
......@@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/phi/kernels/funcs/pooling.h"
namespace paddle {
namespace operators {
......@@ -80,10 +81,10 @@ class MLUPoolOpKernel : public framework::OpKernel<T> {
data_dims = phi::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
phi::funcs::UpdatePadding(&paddings, global_pooling, adaptive,
padding_algorithm, data_dims, strides, ksize);
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
phi::funcs::UpdateKernelSize(&ksize, data_dims);
}
MLUCnnlTensorDesc in_x_desc(*in_x, cnnl_layout, ToCnnlDataType<T>());
......@@ -191,10 +192,10 @@ class MLUPoolGradOpKernel : public framework::OpKernel<T> {
data_dims = phi::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
phi::funcs::UpdatePadding(&paddings, global_pooling, adaptive,
padding_algorithm, data_dims, strides, ksize);
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
phi::funcs::UpdateKernelSize(&ksize, data_dims);
}
// inputs need with NHWC layout
......
......@@ -11,8 +11,10 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/phi/kernels/funcs/pooling.h"
namespace paddle {
namespace operators {
......@@ -68,8 +70,8 @@ class NPUPoolOpKernel : public framework::OpKernel<T> {
strides_vec[2] = strides[0];
strides_vec[3] = strides[1];
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
phi::funcs::UpdatePadding(&paddings, global_pooling, adaptive,
padding_algorithm, data_dims, strides, ksize);
PADDLE_ENFORCE_LT(
std::max(paddings[0], paddings[1]), ksize[0],
platform::errors::InvalidArgument(
......@@ -201,8 +203,8 @@ class NPUPoolGradOpKernel : public framework::OpKernel<T> {
strides_vec[2] = strides[0];
strides_vec[3] = strides[1];
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
phi::funcs::UpdatePadding(&paddings, global_pooling, adaptive,
padding_algorithm, data_dims, strides, ksize);
PADDLE_ENFORCE_LT(
std::max(paddings[0], paddings[1]), ksize[0],
......
......@@ -8,13 +8,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pool_op.h"
#include <unordered_map>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor.h"
#ifdef PADDLE_WITH_XPU
namespace paddle {
namespace operators {
using framework::Tensor;
xpu::Pooling_t XPUPoolingType(const std::string& pooltype, bool exclusive,
bool is_test) {
if (pooltype == "max") {
......
......@@ -12,8 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pool_with_index_op.h"
#include <memory>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/unary.h"
namespace paddle {
namespace operators {
......@@ -28,71 +32,6 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
platform::errors::InvalidArgument(
"Input(X) of Pooling should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
platform::errors::InvalidArgument(
"Output(Out) of Pooling should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Mask"), true,
platform::errors::InvalidArgument(
"Output(Mask) of Pooling should not be null."));
auto in_x_dims = ctx->GetInputDim("X");
std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
bool adaptive = ctx->Attrs().Get<bool>("adaptive");
PADDLE_ENFORCE(
in_x_dims.size() == 4 || in_x_dims.size() == 5,
platform::errors::InvalidArgument("Pooling intput should be 4-D or 5-D "
"tensor but received %dD-Tensor",
in_x_dims.size()));
if (ctx->Attrs().Get<bool>("global_pooling")) {
ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_x_dims[i + 2]);
}
}
PADDLE_ENFORCE_EQ(
in_x_dims.size() - ksize.size(), 2U,
platform::errors::InvalidArgument(
"The input size %d minus the kernel size %d should equal to 2.",
in_x_dims.size(), ksize.size()));
PADDLE_ENFORCE_EQ(
ksize.size(), strides.size(),
platform::errors::InvalidArgument(
"Strides size %d and pooling size %d should be the same.",
strides.size(), ksize.size()));
PADDLE_ENFORCE_EQ(
ksize.size(), paddings.size(),
platform::errors::InvalidArgument(
"Paddings size %d and pooling size %d should be the same.",
paddings.size(), ksize.size()));
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
if (adaptive) {
output_shape.insert(output_shape.end(), ksize.begin(), ksize.end());
} else {
for (size_t i = 0; i < ksize.size(); ++i) {
if ((!ctx->IsRuntime()) && (in_x_dims[i + 2] < 0)) {
output_shape.push_back(in_x_dims[i + 2]);
} else {
output_shape.push_back(MaxPoolOutputSize(in_x_dims[i + 2], ksize[i],
paddings[i], strides[i]));
}
}
}
ctx->SetOutputDim("Out", phi::make_ddim(output_shape));
ctx->SetOutputDim("Mask", phi::make_ddim(output_shape));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
......@@ -106,22 +45,6 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE_EQ(
ctx->HasInput("Mask"), true,
platform::errors::InvalidArgument("Input(Mask) must not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("X"), true,
platform::errors::InvalidArgument("Input(X) must not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), true,
platform::errors::InvalidArgument(
"Input(Out@GRAD) should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true,
platform::errors::InvalidArgument(
"Output(X@GRAD) should not be null."));
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
......@@ -335,40 +258,34 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(
namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(max_pool2d_with_index,
MaxPool2dWithIndexInferShapeFunctor,
PD_INFER_META(phi::MaxPoolWithIndexInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(max_pool2d_with_index_grad,
MaxPool2dWithIndexGradInferShapeFunctor,
PD_INFER_META(phi::MaxPoolWithIndexGradInferMeta));
REGISTER_OPERATOR(max_pool2d_with_index, ops::MaxPoolWithIndexOp,
ops::MaxPool2dWithIndexOpMaker,
ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>);
ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>,
MaxPool2dWithIndexInferShapeFunctor);
REGISTER_OPERATOR(max_pool2d_with_index_grad, ops::MaxPoolWithIndexOpGrad,
ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer);
ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer,
MaxPool2dWithIndexGradInferShapeFunctor);
REGISTER_OP_CPU_KERNEL(
max_pool2d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUDeviceContext, float, int>,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUDeviceContext, double,
int>);
REGISTER_OP_CPU_KERNEL(
max_pool2d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUDeviceContext, float,
int>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUDeviceContext, double,
int>);
DECLARE_INFER_SHAPE_FUNCTOR(max_pool3d_with_index,
MaxPool3dWithIndexInferShapeFunctor,
PD_INFER_META(phi::MaxPoolWithIndexInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(max_pool3d_with_index_grad,
MaxPool3dWithIndexGradInferShapeFunctor,
PD_INFER_META(phi::MaxPoolWithIndexGradInferMeta));
REGISTER_OPERATOR(max_pool3d_with_index, ops::MaxPoolWithIndexOp,
ops::MaxPool3dWithIndexOpMaker,
ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>);
ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>,
MaxPool3dWithIndexInferShapeFunctor);
REGISTER_OPERATOR(max_pool3d_with_index_grad, ops::MaxPoolWithIndexOpGrad,
ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(
max_pool3d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUDeviceContext, float, int>,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUDeviceContext, double,
int>);
REGISTER_OP_CPU_KERNEL(
max_pool3d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUDeviceContext, float,
int>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUDeviceContext, double,
int>);
ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer,
MaxPool3dWithIndexGradInferShapeFunctor);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pool_with_index_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
max_pool2d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::CUDADeviceContext, float,
int>,
ops::MaxPoolWithIndexKernel<paddle::platform::CUDADeviceContext, double,
int>);
REGISTER_OP_CUDA_KERNEL(
max_pool2d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CUDADeviceContext, float,
int>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CUDADeviceContext, double,
int>);
REGISTER_OP_CUDA_KERNEL(
max_pool3d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::CUDADeviceContext, float,
int>,
ops::MaxPoolWithIndexKernel<paddle::platform::CUDADeviceContext, double,
int>);
REGISTER_OP_CUDA_KERNEL(
max_pool3d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CUDADeviceContext, float,
int>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CUDADeviceContext, double,
int>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T1, typename T2>
class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* in_x = context.Input<Tensor>("X");
Tensor* out = context.Output<Tensor>("Out");
Tensor* mask = context.Output<Tensor>("Mask");
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
bool adaptive = context.Attr<bool>("adaptive");
auto& dev_ctx = context.template device_context<DeviceContext>();
if (context.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_x->dims()[i + 2]);
}
}
switch (ksize.size()) {
case 2: {
paddle::operators::math::MaxPool2dWithIndexFunctor<DeviceContext, T1,
T2>
pool2d_forward;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, adaptive, out,
mask);
} break;
case 3: {
paddle::operators::math::MaxPool3dWithIndexFunctor<DeviceContext, T1,
T2>
pool3d_forward;
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, adaptive, out,
mask);
} break;
default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op only supports 2D and 3D input."));
}
}
}
};
template <typename DeviceContext, typename T1, typename T2>
class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* mask = context.Input<Tensor>("Mask");
const Tensor* out_grad =
context.Input<Tensor>(framework::GradVarName("Out"));
Tensor* in_x_grad = context.Output<Tensor>(framework::GradVarName("X"));
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
bool adaptive = context.Attr<bool>("adaptive");
if (context.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_x_grad->dims()[i + 2]);
}
}
if (in_x_grad) {
in_x_grad->mutable_data<T1>(context.GetPlace());
auto& device_ctx = context.template device_context<DeviceContext>();
phi::funcs::set_constant(device_ctx, in_x_grad, 0);
switch (ksize.size()) {
case 2: {
paddle::operators::math::MaxPool2dWithIndexGradFunctor<DeviceContext,
T1, T2>
pool2d_backward;
pool2d_backward(device_ctx, *out_grad, *mask, ksize, strides,
paddings, adaptive, in_x_grad);
} break;
case 3: {
paddle::operators::math::MaxPool3dWithIndexGradFunctor<DeviceContext,
T1, T2>
pool3d_backward;
pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides,
paddings, adaptive, in_x_grad);
} break;
default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op only supports 2D and 3D input."));
}
}
}
}
};
} // namespace operators
} // namespace paddle
......@@ -16,9 +16,10 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/fluid/framework/phi_utils.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/pooling.h"
namespace paddle {
namespace operators {
......@@ -53,14 +54,20 @@ class SppKernel : public framework::OpKernel<T> {
out_level.mutable_data<T>(output_shape, context.GetPlace());
// pooling
if (pooling_type == "max") {
math::Pool2dFunctor<DeviceContext, math::MaxPool<T>, T> pool_forward;
math::MaxPool<T> max_process;
phi::funcs::Pool2dFunctor<
typename framework::ConvertToPhiContext<DeviceContext>::TYPE,
phi::funcs::MaxPool<T>, T>
pool_forward;
phi::funcs::MaxPool<T> max_process;
pool_forward(context.template device_context<DeviceContext>(), *in_x,
kernel_size, strides, paddings, true, false, &out_level,
max_process);
} else if (pooling_type == "avg") {
math::Pool2dFunctor<DeviceContext, math::AvgPool<T>, T> pool_forward;
math::AvgPool<T> avg_process;
phi::funcs::Pool2dFunctor<
typename framework::ConvertToPhiContext<DeviceContext>::TYPE,
phi::funcs::AvgPool<T>, T>
pool_forward;
phi::funcs::AvgPool<T> avg_process;
pool_forward(context.template device_context<DeviceContext>(), *in_x,
kernel_size, strides, paddings, true, false, &out_level,
avg_process);
......@@ -95,7 +102,9 @@ class SppGradKernel : public framework::OpKernel<T> {
std::string pooling_type =
context.template Attr<std::string>("pooling_type");
auto& device_ctx = context.template device_context<DeviceContext>();
phi::funcs::SetConstant<DeviceContext, T> zero;
phi::funcs::SetConstant<
typename framework::ConvertToPhiContext<DeviceContext>::TYPE, T>
zero;
in_x_grad->mutable_data<T>(context.GetPlace());
zero(device_ctx, in_x_grad, static_cast<T>(0));
auto out_stride = phi::stride(out->dims());
......@@ -145,14 +154,18 @@ class SppGradKernel : public framework::OpKernel<T> {
outgrad_level.Resize(out_shape);
// pooling backward
if (pooling_type == "max") {
math::MaxPool2dGradFunctor<DeviceContext, T> pool2d_backward;
phi::funcs::MaxPool2dGradFunctor<
typename framework::ConvertToPhiContext<DeviceContext>::TYPE, T>
pool2d_backward;
pool2d_backward(context.template device_context<DeviceContext>(), *in_x,
*&out_level, *&outgrad_level, kernel_size, strides,
paddings, in_x_grad);
} else if (pooling_type == "avg") {
math::Pool2dGradFunctor<DeviceContext, math::AvgPoolGrad<T>, T>
phi::funcs::Pool2dGradFunctor<
typename framework::ConvertToPhiContext<DeviceContext>::TYPE,
phi::funcs::AvgPoolGrad<T>, T>
pool_backward;
math::AvgPoolGrad<T> avg_process;
phi::funcs::AvgPoolGrad<T> avg_process;
pool_backward(context.template device_context<DeviceContext>(), *in_x,
*&out_level, *&outgrad_level, kernel_size, strides,
paddings, true, false, in_x_grad, avg_process);
......
......@@ -17,7 +17,6 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......
......@@ -16,7 +16,6 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/pooling.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
......
......@@ -26,11 +26,13 @@ namespace phi {
// TODO(chenweihang): add other flags if needed
struct MetaConfig {
bool is_runtime{true};
bool is_run_mkldnn_kernel{false};
MetaConfig() = default;
// supporting implicit construction is easier to use
MetaConfig(bool is_runtime) : is_runtime(is_runtime) {} // NOLINT
MetaConfig(bool is_runtime, bool is_run_mkldnn_kernel)
: is_runtime(is_runtime),
is_run_mkldnn_kernel(is_run_mkldnn_kernel) {} // NOLINT
};
class MetaTensor {
......
......@@ -122,6 +122,35 @@ void GumbelSoftmaxGradInferMeta(const MetaTensor& out,
dx->share_meta(dout);
}
void MaxPoolWithIndexGradInferMeta(const MetaTensor& x,
const MetaTensor& mask,
const MetaTensor& dout,
const std::vector<int>& kernel_size,
const std::vector<int>& strides,
const std::vector<int>& paddings,
bool global_pooling,
bool adaptive,
MetaTensor* dx) {
dx->share_meta(x);
}
void PoolGradInferMeta(const MetaTensor& x,
const MetaTensor& out,
const MetaTensor& dout,
const std::vector<int>& kernel_size,
const std::vector<int>& strides,
const std::vector<int>& paddings,
bool ceil_mode,
bool exclusive,
const std::string& data_format,
const std::string& pooling_type,
bool global_pooling,
bool adaptive,
const std::string& padding_algorithm,
MetaTensor* dx) {
dx->share_meta(x);
}
void PsroiPoolGradInferMeta(const MetaTensor& x,
const MetaTensor& rois,
paddle::optional<const MetaTensor&> rois_num,
......
......@@ -54,6 +54,16 @@ void GumbelSoftmaxGradInferMeta(const MetaTensor& out,
int axis,
MetaTensor* dx);
void MaxPoolWithIndexGradInferMeta(const MetaTensor& x,
const MetaTensor& mask,
const MetaTensor& dout,
const std::vector<int>& kernel_size,
const std::vector<int>& strides,
const std::vector<int>& paddings,
bool global_pooling,
bool adaptive,
MetaTensor* dx);
void PsroiPoolGradInferMeta(const MetaTensor& x,
const MetaTensor& rois,
paddle::optional<const MetaTensor&> rois_num,
......@@ -64,6 +74,21 @@ void PsroiPoolGradInferMeta(const MetaTensor& x,
float spatial_scale,
MetaTensor* dx);
void PoolGradInferMeta(const MetaTensor& x,
const MetaTensor& out,
const MetaTensor& dout,
const std::vector<int>& kernel_size,
const std::vector<int>& strides,
const std::vector<int>& paddings,
bool ceil_mode,
bool exclusive,
const std::string& data_format,
const std::string& pooling_type,
bool global_pooling,
bool adaptive,
const std::string& padding_algorithm,
MetaTensor* dx);
void ScatterGradInferMeta(const MetaTensor& index,
const MetaTensor& updates,
const MetaTensor& out_grad,
......
......@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/phi/common/type_traits.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/kernels/funcs/pooling.h"
#include "paddle/phi/kernels/funcs/unfold_functor.h"
namespace phi {
......@@ -553,6 +554,78 @@ void IsfiniteInferMeta(const MetaTensor& x, MetaTensor* out) {
out->set_dtype(DataType::BOOL);
}
void MaxPoolWithIndexInferMeta(const MetaTensor& x,
const std::vector<int>& kernel_size,
const std::vector<int>& strides,
const std::vector<int>& paddings,
bool global_pooling,
bool adaptive,
MetaTensor* out,
MetaTensor* mask,
MetaConfig config) {
std::vector<int> paddings_ = paddings;
std::vector<int> kernel_size_ = kernel_size;
auto x_dims = x.dims();
PADDLE_ENFORCE(
x_dims.size() == 4 || x_dims.size() == 5,
errors::InvalidArgument(
"Pooling intput should be 4-D or 5-D tensor but received %dD-Tensor",
x_dims.size()));
if (global_pooling) {
kernel_size_.resize(static_cast<size_t>(x_dims.size()) - 2);
for (size_t i = 0; i < kernel_size_.size(); ++i) {
paddings_[i] = 0;
kernel_size_[i] = static_cast<int>(x_dims[i + 2]);
}
}
PADDLE_ENFORCE_EQ(
x_dims.size() - kernel_size_.size(),
2U,
errors::InvalidArgument(
"The input size %d minus the kernel size %d should equal to 2.",
x_dims.size(),
kernel_size_.size()));
PADDLE_ENFORCE_EQ(
kernel_size_.size(),
strides.size(),
errors::InvalidArgument(
"Strides size %d and pooling size %d should be the same.",
strides.size(),
kernel_size_.size()));
PADDLE_ENFORCE_EQ(
kernel_size_.size(),
paddings_.size(),
errors::InvalidArgument(
"Paddings size %d and pooling size %d should be the same.",
paddings_.size(),
kernel_size_.size()));
std::vector<int64_t> output_shape({x_dims[0], x_dims[1]});
if (adaptive) {
output_shape.insert(
output_shape.end(), kernel_size_.begin(), kernel_size_.end());
} else {
for (size_t i = 0; i < kernel_size_.size(); ++i) {
if ((!config.is_runtime) && (x_dims[i + 2] < 0)) {
output_shape.push_back(x_dims[i + 2]);
} else {
output_shape.push_back(funcs::MaxPoolOutputSize(
x_dims[i + 2], kernel_size_[i], paddings_[i], strides[i]));
}
}
}
out->set_dims(make_ddim(output_shape));
out->set_dtype(x.dtype());
mask->set_dims(make_ddim(output_shape));
mask->set_dtype(paddle::experimental::CppTypeToDataType<int>::Type());
}
void MultinomialInferMeta(const MetaTensor& x,
int num_samples,
bool replacement,
......@@ -675,6 +748,118 @@ void PixelShuffleInferMeta(const MetaTensor& x,
out->set_dims(output_dims);
}
void PoolInferMeta(const MetaTensor& x,
const std::vector<int>& kernel_size,
const std::vector<int>& strides,
const std::vector<int>& paddings,
bool ceil_mode,
bool exclusive,
const std::string& data_format,
const std::string& pooling_type,
bool global_pooling,
bool adaptive,
const std::string& padding_algorithm,
MetaTensor* out,
MetaConfig config) {
std::vector<int> paddings_ = paddings;
std::vector<int> kernel_size_ = kernel_size;
auto x_dims = x.dims();
PADDLE_ENFORCE_EQ(
x_dims.size() == 4 || x_dims.size() == 5,
true,
errors::InvalidArgument(
"the input of Op(pool) should be 4-D or 5-D Tensor. But "
"received: %u-D Tensor and it's shape is [%s].",
x_dims.size(),
x_dims));
PADDLE_ENFORCE_EQ(x_dims.size() - kernel_size_.size(),
2U,
errors::InvalidArgument(
"the dimension of input minus the size of "
"Attr(kernel_size_) must be euqal to 2 in Op(pool). "
"But received: the dimension of input minus the size "
"of Attr(kernel_size_) is %d, the "
"input's dimension is %d, the shape of input "
"is [%s], the Attr(kernel_size_)'s size is %d, the "
"Attr(kernel_size_) is [%s].",
x_dims.size() - kernel_size_.size(),
x_dims.size(),
x_dims,
kernel_size_.size(),
make_ddim(kernel_size_)));
PADDLE_ENFORCE_EQ(
kernel_size_.size(),
strides.size(),
errors::InvalidArgument(
"the size of Attr(kernel_size_) and Attr(strides) in "
"Op(pool) must be equal. "
"But received: Attr(kernel_size_)'s size is %d, Attr(strides)'s "
"size is %d, Attr(kernel_size_) is [%s], Attr(strides)is [%s].",
kernel_size_.size(),
strides.size(),
make_ddim(kernel_size_),
make_ddim(strides)));
// MKL-DNN Kernels are using NCHW order of dims description
// so we ignore data_format consideration for MKL-DNN kernel
const bool channel_last = (config.is_run_mkldnn_kernel == false) &&
(data_format == "NHWC" || data_format == "NDHWC");
// update paddings if "SAME" or global_pooling
DDim data_dims;
if (channel_last) {
data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1);
} else {
data_dims = slice_ddim(x_dims, 2, x_dims.size());
}
funcs::UpdatePadding(&paddings_,
global_pooling,
adaptive,
padding_algorithm,
data_dims,
strides,
kernel_size_);
if (global_pooling) {
funcs::UpdateKernelSize(&kernel_size_, data_dims);
}
std::vector<int64_t> output_shape;
if (adaptive) {
output_shape.insert(
output_shape.end(), kernel_size_.begin(), kernel_size_.end());
} else {
for (int i = 0; i < data_dims.size(); ++i) {
if ((!config.is_runtime) && (data_dims[i] < 0)) {
output_shape.push_back(data_dims[i]);
} else {
output_shape.push_back(funcs::PoolOutputSize(data_dims[i],
kernel_size_[i],
paddings_[2 * i],
paddings_[2 * i + 1],
strides[i],
ceil_mode));
}
}
}
// output_N = input_N
output_shape.insert(output_shape.begin(), x_dims[0]);
// output_C = input_C
if (channel_last) {
output_shape.push_back(x_dims[x_dims.size() - 1]);
} else {
output_shape.insert(output_shape.begin() + 1, x_dims[1]);
}
out->set_dims(make_ddim(output_shape));
out->share_lod(x);
out->set_dtype(x.dtype());
}
void RealAndImagInferMeta(const MetaTensor& x, MetaTensor* out) {
out->set_dims(x.dims());
out->set_dtype(dtype::ToReal(x.dtype()));
......
......@@ -98,6 +98,16 @@ void IsEmptyInferMeta(const MetaTensor& x, MetaTensor* out);
void IsfiniteInferMeta(const MetaTensor& input, MetaTensor* out);
void MaxPoolWithIndexInferMeta(const MetaTensor& x,
const std::vector<int>& kernel_size,
const std::vector<int>& strides,
const std::vector<int>& paddings,
bool global_pooling,
bool adaptive,
MetaTensor* out,
MetaTensor* mask,
MetaConfig config = MetaConfig());
void MultinomialInferMeta(const MetaTensor& x,
int num_samples,
bool replacement,
......@@ -114,6 +124,20 @@ void PixelShuffleInferMeta(const MetaTensor& x,
const std::string& data_format,
MetaTensor* out);
void PoolInferMeta(const MetaTensor& x,
const std::vector<int>& kernel_size,
const std::vector<int>& strides,
const std::vector<int>& paddings,
bool ceil_mode,
bool exclusive,
const std::string& data_format,
const std::string& pooling_type,
bool global_pooling,
bool adaptive,
const std::string& padding_algorithm,
MetaTensor* out,
MetaConfig config = MetaConfig());
void RealAndImagInferMeta(const MetaTensor& x, MetaTensor* out);
void ReduceInferMeta(const MetaTensor& x,
......
......@@ -11,7 +11,7 @@ set_property(GLOBAL PROPERTY PHI_KERNELS "")
# [ 1. Common kernel compilation dependencies ]
set(COMMON_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils custom_kernel)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor softmax)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor)
# remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
......@@ -27,22 +27,25 @@ kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel)
# Some kernels depend on some targets that are not commonly used.
# These targets are not suitable for common dependencies.
# In this case, you need to manually generate them here.
set(MANUAL_BUILD_KERNELS math_kernel softmax_kernel softmax_grad_kernel triangular_solve_grad_kernel maxout_kernel maxout_grad_kernel put_along_axis_kernel put_along_axis_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel eigh_kernel segment_pool_kernel segment_pool_grad_kernel matrix_power_kernel matrix_power_grad_kernel)
set(MANUAL_BUILD_KERNELS eigh_kernel gumbel_softmax_kernel gumbel_softmax_grad_kernel math_kernel matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel triangular_solve_grad_kernel)
kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function)
kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(math_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel)
kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_reduce)
kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
kernel_library(maxout_grad_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
kernel_library(pool_kernel DEPS ${COMMON_KERNEL_DEPS} pooling)
kernel_library(put_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel)
kernel_library(put_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel)
kernel_library(take_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel)
kernel_library(take_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel)
kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function)
kernel_library(segment_pool_kernel DEPS ${COMMON_KERNEL_DEPS} segment_pooling)
kernel_library(segment_pool_grad_kernel DEPS ${COMMON_KERNEL_DEPS} segment_pooling)
kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(take_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel)
kernel_library(take_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel)
kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_reduce)
# 4. auto parse and build kernel targets by cmake
register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS ${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS} )
......
......@@ -40,7 +40,7 @@ DenseTensor Concat(const Context& dev_ctx,
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ConcatInferMeta(meta_x_ptr, axis.to<int>(), &meta_out, /*is_runtime=*/true);
ConcatInferMeta(meta_x_ptr, axis.to<int>(), &meta_out);
ConcatKernel<T, Context>(dev_ctx, x, axis, &dense_out);
return dense_out;
}
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/pool_grad_kernel.h"
#include "paddle/phi/kernels/impl/pool_grad_kernel_impl.h"
#include "paddle/phi/core/kernel_registry.h"
PD_REGISTER_KERNEL(
pool2d_grad, CPU, ALL_LAYOUT, phi::Pool2dGradKernel, float, double) {}
PD_REGISTER_KERNEL(pool2d_double_grad,
CPU,
ALL_LAYOUT,
phi::Pool2dDoubleGradKernel,
float,
double) {}
PD_REGISTER_KERNEL(max_pool2d_with_index_grad,
CPU,
ALL_LAYOUT,
phi::MaxPool2dWithIndexGradKernel,
float,
double) {
kernel->InputAt(1).SetDataType(
paddle::experimental::CppTypeToDataType<int>::Type());
}
PD_REGISTER_KERNEL(
pool3d_grad, CPU, ALL_LAYOUT, phi::Pool3dGradKernel, float, double) {}
PD_REGISTER_KERNEL(max_pool3d_with_index_grad,
CPU,
ALL_LAYOUT,
phi::MaxPool3dWithIndexGradKernel,
float,
double) {
kernel->InputAt(1).SetDataType(
paddle::experimental::CppTypeToDataType<int>::Type());
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/pool_kernel.h"
#include "paddle/phi/kernels/impl/pool_kernel_impl.h"
#include "paddle/phi/core/kernel_registry.h"
PD_REGISTER_KERNEL(pool2d, CPU, ALL_LAYOUT, phi::Pool2dKernel, float, double) {}
PD_REGISTER_KERNEL(max_pool2d_with_index,
CPU,
ALL_LAYOUT,
phi::MaxPool2dWithIndexKernel,
float,
double) {
kernel->OutputAt(1).SetDataType(
paddle::experimental::CppTypeToDataType<int>::Type());
}
PD_REGISTER_KERNEL(pool3d, CPU, ALL_LAYOUT, phi::Pool3dKernel, float, double) {}
PD_REGISTER_KERNEL(max_pool3d_with_index,
CPU,
ALL_LAYOUT,
phi::MaxPool3dWithIndexKernel,
float,
double) {
kernel->OutputAt(1).SetDataType(
paddle::experimental::CppTypeToDataType<int>::Type());
}
......@@ -38,7 +38,7 @@ void SplitKernel(const Context& dev_ctx,
out_metas_ptr.push_back(&out_metas.back());
}
phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr, true);
phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr);
for (size_t i = 0; i < out_metas.size(); ++i) {
outs[i]->Resize(out_metas[i].dims());
......
......@@ -3,11 +3,12 @@ add_subdirectory(blas)
add_subdirectory(lapack)
add_subdirectory(detail)
math_library(math_function DEPS blas dense_tensor tensor)
math_library(segment_pooling)
math_library(sequence2batch)
math_library(concat_and_split_functor DEPS dense_tensor)
math_library(gru_compute DEPS activation_functions math_function)
math_library(lstm_compute DEPS activation_functions)
math_library(concat_and_split_functor DEPS dense_tensor)
math_library(math_function DEPS blas dense_tensor tensor)
math_library(matrix_reduce DEPS dense_tensor)
math_library(matrix_inverse DEPS dense_tensor eigen3 blas)
math_library(pooling DEPS dense_tensor)
math_library(segment_pooling)
math_library(sequence2batch)
此差异已折叠。
此差异已折叠。
......@@ -37,7 +37,7 @@ void SplitKernel(const Context& dev_ctx,
out_metas_ptr.push_back(&out_metas.back());
}
phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr, true);
phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr);
for (size_t i = 0; i < out_metas.size(); ++i) {
outs[i]->Resize(out_metas[i].dims());
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册