未验证 提交 3f64a2c3 编写于 作者: Z zyfncg 提交者: GitHub

Polish slice code in fluid (#45746)

* support selected_rows kernel for multiply in dygraph

* delete useless code of slice in fluid

* fix complie bug

* move slice_array from fluid to phi

* fix strided_slice_op_npu
上级 5b1dd387
...@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/slice_op.h"
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/kernels/funcs/slice_utils.h" #include "paddle/phi/kernels/funcs/slice_utils.h"
namespace paddle { namespace paddle {
...@@ -456,53 +455,3 @@ REGISTER_OPERATOR(slice_grad, ...@@ -456,53 +455,3 @@ REGISTER_OPERATOR(slice_grad,
ops::SliceDoubleOpGradMaker<paddle::imperative::OpBase>, ops::SliceDoubleOpGradMaker<paddle::imperative::OpBase>,
ops::SliceOpGradNoNeedBufferVarsInferer, ops::SliceOpGradNoNeedBufferVarsInferer,
ops::SliceOpGradVarTypeInference); ops::SliceOpGradVarTypeInference);
REGISTER_OP_CPU_KERNEL(
slice,
ops::SliceKernel<phi::CPUContext, bool>,
ops::SliceKernel<phi::CPUContext, uint8_t>,
ops::SliceKernel<phi::CPUContext, int>,
ops::SliceKernel<phi::CPUContext, int64_t>,
ops::SliceKernel<phi::CPUContext, float>,
ops::SliceKernel<phi::CPUContext, double>,
ops::SliceKernel<phi::CPUContext, paddle::platform::complex<float>>,
ops::SliceKernel<phi::CPUContext, paddle::platform::complex<double>>,
ops::SliceKernel<phi::CPUContext, paddle::platform::bfloat16>);
REGISTER_OP_CPU_KERNEL(
slice_grad,
ops::SliceGradKernel<phi::CPUContext, bool>,
ops::SliceGradKernel<phi::CPUContext, uint8_t>,
ops::SliceGradKernel<phi::CPUContext, int>,
ops::SliceGradKernel<phi::CPUContext, int64_t>,
ops::SliceGradKernel<phi::CPUContext, float>,
ops::SliceGradKernel<phi::CPUContext, double>,
ops::SliceGradKernel<phi::CPUContext, paddle::platform::complex<float>>,
ops::SliceGradKernel<phi::CPUContext, paddle::platform::complex<double>>,
ops::SliceGradKernel<phi::CPUContext, paddle::platform::bfloat16>);
REGISTER_OP_CUDA_KERNEL(
slice,
ops::SliceKernel<phi::GPUContext, bool>,
ops::SliceKernel<phi::GPUContext, uint8_t>,
ops::SliceKernel<phi::GPUContext, float>,
ops::SliceKernel<phi::GPUContext, double>,
ops::SliceKernel<phi::GPUContext, int>,
ops::SliceKernel<phi::GPUContext, int64_t>,
ops::SliceKernel<phi::GPUContext, paddle::platform::float16>,
ops::SliceKernel<phi::GPUContext, paddle::platform::bfloat16>,
ops::SliceKernel<phi::GPUContext, paddle::platform::complex<float>>,
ops::SliceKernel<phi::GPUContext, paddle::platform::complex<double>>);
REGISTER_OP_CUDA_KERNEL(
slice_grad,
ops::SliceGradKernel<phi::GPUContext, bool>,
ops::SliceGradKernel<phi::GPUContext, uint8_t>,
ops::SliceGradKernel<phi::GPUContext, float>,
ops::SliceGradKernel<phi::GPUContext, double>,
ops::SliceGradKernel<phi::GPUContext, int>,
ops::SliceGradKernel<phi::GPUContext, int64_t>,
ops::SliceGradKernel<phi::GPUContext, paddle::platform::float16>,
ops::SliceGradKernel<phi::GPUContext, paddle::platform::bfloat16>,
ops::SliceGradKernel<phi::GPUContext, paddle::platform::complex<float>>,
ops::SliceGradKernel<phi::GPUContext, paddle::platform::complex<double>>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using Variable = framework::Variable;
using LoDTensorArray = framework::LoDTensorArray;
using DDim = framework::DDim;
inline void DealTensorArray(const framework::ExecutionContext& ctx,
const std::vector<int64_t>& starts,
const std::vector<int64_t>& ends,
bool out_is_array) {
auto in_array = ctx.Input<LoDTensorArray>("Input");
// If the input is LoDTensorArray, the rank of input is 1.
int64_t in_size = in_array->size();
int64_t start = starts[0] < 0 ? (starts[0] + in_size) : starts[0];
int64_t end = ends[0] < 0 ? (ends[0] + in_size) : ends[0];
start = std::max(start, static_cast<int64_t>(0));
end = std::max(end, static_cast<int64_t>(0));
end = std::min(end, in_size);
if (starts[0] == -1 && end == 0) {
end = start + 1;
}
PADDLE_ENFORCE_GT(end,
start,
platform::errors::InvalidArgument(
"Attr(ends) should be greater than attr(starts) in "
"slice op. But received end = %d, start = %d.",
ends[0],
starts[0]));
int64_t out_size = end - start;
if (out_is_array) {
auto out_array = ctx.Output<LoDTensorArray>("Out");
out_array->resize(out_size);
for (int i = 0; i < out_size; ++i) {
auto* out_tensor = &out_array->at(i);
auto in_tensor = in_array->at(i + start);
out_tensor->set_lod(in_tensor.lod());
if (in_tensor.memory_size() > 0) {
paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out_tensor);
} else {
VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
"nothing has been written to output array["
<< i << "].";
}
}
} else {
auto out = ctx.Output<phi::DenseTensor>("Out");
auto in_tensor = in_array->at(start);
paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out);
}
}
template <typename DeviceContext, typename T>
class SliceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const Variable* input_var = ctx.InputVar("Input");
Variable* out_var = ctx.OutputVar("Out");
bool input_is_array = input_var->IsType<LoDTensorArray>();
bool out_is_array = out_var->IsType<LoDTensorArray>();
auto axes_int = ctx.Attr<std::vector<int>>("axes");
auto starts_int = ctx.Attr<std::vector<int>>("starts");
auto ends_int = ctx.Attr<std::vector<int>>("ends");
std::vector<int64_t> axes(axes_int.begin(), axes_int.end());
std::vector<int64_t> starts(starts_int.begin(), starts_int.end());
std::vector<int64_t> ends(ends_int.begin(), ends_int.end());
auto decrease_axis = ctx.Attr<std::vector<int>>("decrease_axis");
auto infer_flags = ctx.Attr<std::vector<int>>("infer_flags");
// Step 1: Get the accurate attribute value of starts and ends
auto starts_tensor_list =
ctx.MultiInput<phi::DenseTensor>("StartsTensorList");
if (ctx.HasInput("StartsTensor")) {
starts = GetDataFromTensor<int64_t>(
ctx.Input<phi::DenseTensor>("StartsTensor"));
} else if (starts_tensor_list.size() > 0) {
starts = GetDataFromTensorList<int64_t>(starts_tensor_list);
}
auto ends_tensor_list = ctx.MultiInput<phi::DenseTensor>("EndsTensorList");
if (ctx.HasInput("EndsTensor")) {
ends =
GetDataFromTensor<int64_t>(ctx.Input<phi::DenseTensor>("EndsTensor"));
} else if (ends_tensor_list.size() > 0) {
ends = GetDataFromTensorList<int64_t>(ends_tensor_list);
}
PADDLE_ENFORCE_EQ(
starts.size(),
axes.size(),
platform::errors::InvalidArgument(
"The size of starts must be equal to the size of axes."));
PADDLE_ENFORCE_EQ(
ends.size(),
axes.size(),
platform::errors::InvalidArgument(
"The size of ends must be equal to the size of axes."));
// Step 2: Compute output
if (input_is_array) {
DealTensorArray(ctx, starts, ends, out_is_array);
return;
}
}
};
template <typename DeviceContext, typename T>
class SliceGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto axes = ctx.Attr<std::vector<int>>("axes");
auto starts_int = ctx.Attr<std::vector<int>>("starts");
auto ends_int = ctx.Attr<std::vector<int>>("ends");
std::vector<int64_t> starts(starts_int.begin(), starts_int.end());
std::vector<int64_t> ends(ends_int.begin(), ends_int.end());
// Get the accurate attribute value of starts and ends
auto starts_tensor_list =
ctx.MultiInput<phi::DenseTensor>("StartsTensorList");
if (ctx.HasInput("StartsTensor")) {
starts = GetDataFromTensor<int64_t>(
ctx.Input<phi::DenseTensor>("StartsTensor"));
} else if (starts_tensor_list.size() > 0) {
starts = GetDataFromTensorList<int64_t>(starts_tensor_list);
}
auto ends_tensor_list = ctx.MultiInput<phi::DenseTensor>("EndsTensorList");
if (ctx.HasInput("EndsTensor")) {
ends =
GetDataFromTensor<int64_t>(ctx.Input<phi::DenseTensor>("EndsTensor"));
} else if (ends_tensor_list.size() > 0) {
ends = GetDataFromTensorList<int64_t>(ends_tensor_list);
}
Variable* d_input_var = ctx.OutputVar(framework::GradVarName("Input"));
const Variable* d_out_var = ctx.InputVar(framework::GradVarName("Out"));
bool d_input_is_array = d_input_var->IsType<LoDTensorArray>();
bool d_out_is_array = d_out_var->IsType<LoDTensorArray>();
if (d_input_is_array) {
auto* input_array = ctx.Input<LoDTensorArray>("Input");
auto* d_in_arr =
ctx.Output<LoDTensorArray>(framework::GradVarName("Input"));
int64_t d_in_size = input_array->size();
d_in_arr->resize(d_in_size);
// If the input is LoDTensorArray, the rank of input is 1.
// So only use the 0th element of starts.
int64_t start = starts[0] < 0 ? (starts[0] + d_in_size) : starts[0];
start = std::max(start, static_cast<int64_t>(0));
// set zero
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
auto& dev_ctx = *pool.Get(ctx.GetPlace());
phi::funcs::SetConstant<DeviceContext, T> functor;
for (int i = 0; i < d_in_size; ++i) {
auto dim = input_array->at(i).dims();
d_in_arr->at(i).Resize(dim);
d_in_arr->at(i).mutable_data<T>(ctx.GetPlace());
functor(reinterpret_cast<const DeviceContext&>(dev_ctx),
&d_in_arr->at(i),
static_cast<T>(0));
}
if (d_out_is_array) {
auto* d_out_arr =
ctx.Input<LoDTensorArray>(framework::GradVarName("Out"));
int d_out_size = d_out_arr->size();
for (int i = 0; i < d_out_size; ++i) {
paddle::framework::TensorCopy(
d_out_arr->at(i), ctx.GetPlace(), &(d_in_arr->at(start + i)));
}
} else {
auto* d_out =
ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
paddle::framework::TensorCopy(
*d_out, ctx.GetPlace(), &(d_in_arr->at(start)));
}
return;
}
}
private:
};
} // namespace operators
} // namespace paddle
...@@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/operators/slice_op.h" #include "paddle/fluid/operators/utils.h"
#include "paddle/phi/kernels/funcs/slice_utils.h" #include "paddle/phi/kernels/funcs/slice_utils.h"
namespace paddle { namespace paddle {
......
...@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/slice_op.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/phi/kernels/funcs/slice_utils.h" #include "paddle/phi/kernels/funcs/slice_utils.h"
......
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/slice_op.h"
#include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/backward.h" #include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/kernels/funcs/strided_slice.h" #include "paddle/phi/kernels/funcs/strided_slice.h"
......
...@@ -12,13 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,13 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/operators/slice_op.h" #include "paddle/fluid/operators/utils.h"
#include "paddle/phi/kernels/funcs/strided_slice.h" #include "paddle/phi/kernels/funcs/strided_slice.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
using Variable = framework::Variable;
using LoDTensorArray = framework::LoDTensorArray;
using DDim = framework::DDim;
static void ProcessStridedSliceParams( static void ProcessStridedSliceParams(
const std::vector<int>& axes, const std::vector<int>& axes,
const DDim& input_dims, const DDim& input_dims,
......
...@@ -12,13 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,13 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/slice_op.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/phi/kernels/funcs/strided_slice.h" #include "paddle/phi/kernels/funcs/strided_slice.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using Variable = framework::Variable;
using LoDTensorArray = framework::LoDTensorArray;
using DDim = framework::DDim;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class StridedSliceNPUKernel : public framework::OpKernel<T> { class StridedSliceNPUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -58,6 +58,8 @@ class IntArrayBase { ...@@ -58,6 +58,8 @@ class IntArrayBase {
size_t size() const { return array_.size(); } size_t size() const { return array_.size(); }
int64_t operator[](int64_t i) const { return array_[i]; }
const std::vector<int64_t>& GetData() const { return array_; } const std::vector<int64_t>& GetData() const { return array_; }
private: private:
......
...@@ -31,3 +31,29 @@ PD_REGISTER_KERNEL(slice_grad, ...@@ -31,3 +31,29 @@ PD_REGISTER_KERNEL(slice_grad,
phi::dtype::complex<float>, phi::dtype::complex<float>,
phi::dtype::complex<double>, phi::dtype::complex<double>,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(slice_array_grad,
CPU,
ALL_LAYOUT,
phi::SliceArrayGradKernel,
bool,
int,
int64_t,
float,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(slice_array_dense_grad,
CPU,
ALL_LAYOUT,
phi::SliceArrayDenseGradKernel,
bool,
int,
int64_t,
float,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
phi::dtype::bfloat16) {}
...@@ -31,3 +31,31 @@ PD_REGISTER_KERNEL(slice, ...@@ -31,3 +31,31 @@ PD_REGISTER_KERNEL(slice,
phi::dtype::complex<float>, phi::dtype::complex<float>,
phi::dtype::complex<double>, phi::dtype::complex<double>,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(slice_array,
CPU,
ALL_LAYOUT,
phi::SliceArrayKernel,
bool,
int,
uint8_t,
int64_t,
float,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(slice_array_dense,
CPU,
ALL_LAYOUT,
phi::SliceArrayDenseKernel,
bool,
int,
uint8_t,
int64_t,
float,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
phi::dtype::bfloat16) {}
...@@ -32,3 +32,33 @@ PD_REGISTER_KERNEL(slice_grad, ...@@ -32,3 +32,33 @@ PD_REGISTER_KERNEL(slice_grad,
phi::dtype::complex<double>, phi::dtype::complex<double>,
phi::dtype::bfloat16, phi::dtype::bfloat16,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_KERNEL(slice_array_grad,
GPU,
ALL_LAYOUT,
phi::SliceArrayGradKernel,
bool,
int,
uint8_t,
int64_t,
float,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
phi::dtype::bfloat16,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(slice_array_dense_grad,
GPU,
ALL_LAYOUT,
phi::SliceArrayDenseGradKernel,
bool,
int,
uint8_t,
int64_t,
float,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
phi::dtype::bfloat16,
phi::dtype::float16) {}
...@@ -32,3 +32,33 @@ PD_REGISTER_KERNEL(slice, ...@@ -32,3 +32,33 @@ PD_REGISTER_KERNEL(slice,
phi::dtype::complex<double>, phi::dtype::complex<double>,
phi::dtype::bfloat16, phi::dtype::bfloat16,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_KERNEL(slice_array,
GPU,
ALL_LAYOUT,
phi::SliceArrayKernel,
bool,
int,
uint8_t,
int64_t,
float,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
phi::dtype::bfloat16,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(slice_array_dense,
GPU,
ALL_LAYOUT,
phi::SliceArrayDenseKernel,
bool,
int,
uint8_t,
int64_t,
float,
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
phi::dtype::bfloat16,
phi::dtype::float16) {}
...@@ -14,8 +14,10 @@ ...@@ -14,8 +14,10 @@
#pragma once #pragma once
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/slice_utils.h" #include "paddle/phi/kernels/funcs/slice_utils.h"
#include "paddle/phi/kernels/slice_grad_kernel.h" #include "paddle/phi/kernels/slice_grad_kernel.h"
...@@ -350,4 +352,62 @@ void SliceGradRawKernel(const Context& ctx, ...@@ -350,4 +352,62 @@ void SliceGradRawKernel(const Context& ctx,
} }
} }
template <typename T, typename Context>
void SliceArrayGradKernel(const Context& dev_ctx,
const TensorArray& input,
const TensorArray& out_grad,
const IntArray& starts,
const IntArray& ends,
TensorArray* input_grad) {
int64_t d_in_size = input.size();
input_grad->resize(d_in_size);
// If the input is TensorArray, the rank of input is 1.
// So only use the 0th element of starts.
int64_t start = starts[0] < 0 ? (starts[0] + d_in_size) : starts[0];
start = std::max(start, static_cast<int64_t>(0));
// set zero
phi::funcs::SetConstant<Context, T> functor;
for (int i = 0; i < d_in_size; ++i) {
const auto& dim = input.at(i).dims();
auto* in_grad_tensor = &input_grad->at(i);
in_grad_tensor->Resize(dim);
dev_ctx.template Alloc<T>(in_grad_tensor);
functor(dev_ctx, in_grad_tensor, static_cast<T>(0));
}
int d_out_size = out_grad.size();
for (int i = 0; i < d_out_size; ++i) {
phi::Copy<Context>(dev_ctx,
out_grad[i],
dev_ctx.GetPlace(),
false,
&input_grad->at(start + i));
}
}
template <typename T, typename Context>
void SliceArrayDenseGradKernel(const Context& dev_ctx,
const TensorArray& input,
const DenseTensor& out_grad,
const IntArray& starts,
TensorArray* input_grad) {
int64_t d_in_size = input.size();
input_grad->resize(d_in_size);
// If the input is TensorArray, the rank of input is 1.
// So only use the 0th element of starts.
int64_t start = starts[0] < 0 ? (starts[0] + d_in_size) : starts[0];
start = std::max(start, static_cast<int64_t>(0));
// set zero
phi::funcs::SetConstant<Context, T> functor;
for (int i = 0; i < d_in_size; ++i) {
const auto& dim = input.at(i).dims();
auto* in_grad_tensor = &input_grad->at(i);
in_grad_tensor->Resize(dim);
dev_ctx.template Alloc<T>(in_grad_tensor);
functor(dev_ctx, in_grad_tensor, static_cast<T>(0));
}
phi::Copy<Context>(
dev_ctx, out_grad, dev_ctx.GetPlace(), false, &input_grad->at(start));
}
} // namespace phi } // namespace phi
...@@ -14,9 +14,13 @@ ...@@ -14,9 +14,13 @@
#pragma once #pragma once
#include <glog/logging.h>
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
#include "paddle/phi/kernels/funcs/slice_utils.h" #include "paddle/phi/kernels/funcs/slice_utils.h"
#include "paddle/phi/kernels/slice_kernel.h"
namespace phi { namespace phi {
...@@ -151,4 +155,59 @@ void SliceRawKernel(const Context& ctx, ...@@ -151,4 +155,59 @@ void SliceRawKernel(const Context& ctx,
} }
} }
template <typename T, typename Context>
void SliceArrayKernel(const Context& dev_ctx,
const TensorArray& input,
const IntArray& starts,
const IntArray& ends,
TensorArray* out) {
int64_t in_size = input.size();
int64_t start = starts[0] < 0 ? (starts[0] + in_size) : starts[0];
int64_t end = ends[0] < 0 ? (ends[0] + in_size) : ends[0];
start = std::max(start, static_cast<int64_t>(0));
end = std::max(end, static_cast<int64_t>(0));
end = std::min(end, in_size);
if (starts[0] == -1 && end == 0) {
end = start + 1;
}
PADDLE_ENFORCE_GT(end,
start,
phi::errors::InvalidArgument(
"Attr(ends) should be greater than attr(starts) in "
"slice op. But received end = %d, start = %d.",
ends[0],
starts[0]));
int64_t out_size = end - start;
out->resize(out_size);
for (int i = 0; i < out_size; ++i) {
auto* out_tensor = &out->at(i);
const auto& in_tensor = input.at(i + start);
out_tensor->set_lod(in_tensor.lod());
if (in_tensor.memory_size() > 0) {
phi::Copy<Context>(
dev_ctx, in_tensor, dev_ctx.GetPlace(), false, out_tensor);
} else {
VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
"nothing has been written to output array["
<< i << "].";
}
}
}
template <typename T, typename Context>
void SliceArrayDenseKernel(const Context& dev_ctx,
const TensorArray& input,
const IntArray& starts,
DenseTensor* out) {
int64_t in_size = input.size();
int64_t start = starts[0] < 0 ? (starts[0] + in_size) : starts[0];
start = std::max(start, static_cast<int64_t>(0));
phi::Copy<Context>(dev_ctx, input[start], dev_ctx.GetPlace(), false, out);
}
} // namespace phi } // namespace phi
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_array.h"
namespace phi { namespace phi {
...@@ -30,4 +31,19 @@ void SliceGradRawKernel(const Context& ctx, ...@@ -30,4 +31,19 @@ void SliceGradRawKernel(const Context& ctx,
const std::vector<int64_t>& decrease_axis, const std::vector<int64_t>& decrease_axis,
DenseTensor* input_grad); DenseTensor* input_grad);
template <typename T, typename Context>
void SliceArrayGradKernel(const Context& dev_ctx,
const TensorArray& input,
const TensorArray& out_grad,
const IntArray& starts,
const IntArray& ends,
TensorArray* input_grad);
template <typename T, typename Context>
void SliceArrayDenseGradKernel(const Context& dev_ctx,
const TensorArray& input,
const DenseTensor& out_grad,
const IntArray& starts,
TensorArray* input_grad);
} // namespace phi } // namespace phi
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_array.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/unary.h"
namespace phi { namespace phi {
...@@ -30,6 +31,19 @@ void SliceRawKernel(const Context& ctx, ...@@ -30,6 +31,19 @@ void SliceRawKernel(const Context& ctx,
const std::vector<int64_t>& decrease_axis, const std::vector<int64_t>& decrease_axis,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context>
void SliceArrayKernel(const Context& dev_ctx,
const TensorArray& input,
const IntArray& starts,
const IntArray& ends,
TensorArray* out);
template <typename T, typename Context>
void SliceArrayDenseKernel(const Context& dev_ctx,
const TensorArray& input,
const IntArray& starts,
DenseTensor* out);
template <typename T, typename Context> template <typename T, typename Context>
DenseTensor SliceKernel(const Context& ctx, DenseTensor SliceKernel(const Context& ctx,
const DenseTensor& input, const DenseTensor& input,
......
...@@ -19,7 +19,27 @@ namespace phi { ...@@ -19,7 +19,27 @@ namespace phi {
KernelSignature SliceOpArgumentMapping(const ArgumentMappingContext& ctx) { KernelSignature SliceOpArgumentMapping(const ArgumentMappingContext& ctx) {
// if input is Tensor Array // if input is Tensor Array
if (ctx.IsDenseTensorVectorInput("Input")) { if (ctx.IsDenseTensorVectorInput("Input")) {
return KernelSignature("unregistered", {}, {}, {}); const char* starts_name = "starts";
if (ctx.HasInput("StartsTensor")) {
starts_name = "StartsTensor";
} else if (ctx.InputSize("StartsTensorList") > 0) {
starts_name = "StartsTensorList";
}
const char* ends_name = "ends";
if (ctx.HasInput("EndsTensor")) {
ends_name = "EndsTensor";
} else if (ctx.InputSize("EndsTensorList") > 0) {
ends_name = "EndsTensorList";
}
if (paddle::any_cast<std::vector<int>>(ctx.Attr("decrease_axis")).size() >
0) {
return KernelSignature(
"slice_array_dense", {"Input"}, {starts_name}, {"Out"});
} else {
return KernelSignature(
"slice_array", {"Input"}, {starts_name, ends_name}, {"Out"});
}
} }
if (ctx.HasInput("StartsTensor")) { if (ctx.HasInput("StartsTensor")) {
...@@ -99,7 +119,30 @@ KernelSignature SliceOpArgumentMapping(const ArgumentMappingContext& ctx) { ...@@ -99,7 +119,30 @@ KernelSignature SliceOpArgumentMapping(const ArgumentMappingContext& ctx) {
KernelSignature SliceGradOpArgumentMapping(const ArgumentMappingContext& ctx) { KernelSignature SliceGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorVectorInput("Input")) { if (ctx.IsDenseTensorVectorInput("Input")) {
return KernelSignature("unregistered", {}, {}, {}); const char* starts_name = "starts";
if (ctx.HasInput("StartsTensor")) {
starts_name = "StartsTensor";
} else if (ctx.InputSize("StartsTensorList") > 0) {
starts_name = "StartsTensorList";
}
const char* ends_name = "ends";
if (ctx.HasInput("EndsTensor")) {
ends_name = "EndsTensor";
} else if (ctx.InputSize("EndsTensorList") > 0) {
ends_name = "EndsTensorList";
}
if (paddle::any_cast<std::vector<int>>(ctx.Attr("decrease_axis")).size() >
0) {
return KernelSignature("slice_array_dense_grad",
{"Input", "Out@GRAD"},
{starts_name},
{"Input@GRAD"});
} else {
return KernelSignature("slice_array_grad",
{"Input", "Out@GRAD"},
{starts_name, ends_name},
{"Input@GRAD"});
}
} }
if (ctx.HasInput("StartsTensor")) { if (ctx.HasInput("StartsTensor")) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册