未验证 提交 875cfd57 编写于 作者: D duanboqiang 提交者: GitHub

add unique_consecutive_op (#34334)

* add unique_consecutive_op

* add unique_consecutive_op

* add unique_consecutive_op

* add unique_consecutive_op

* add unique_consecutive_op

* add unique_consecutive_op

* add unique_consecutive_op

* add unique_consecutive_op

* remove unity build

* add unique_consecutive op

* add unique_consecutive op

* add enable static

* add noqa

* add space line

* add default case.

* add comma

* add space line

* modify unique_consecutive unittest

* optimize ut coverage

* rebase develop

* improve coverage

* update en docs

* update en docs

* update en docs

* update en docs

* update en docs

* update en doc
上级 e29c2d12
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/unique_consecutive_op.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle {
namespace operators {
class UniqueConsecutiveOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "unique_consecutive");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out",
"unique_consecutive");
auto in_dims = ctx->GetInputDim("X");
bool return_inverse = ctx->Attrs().Get<bool>("return_inverse");
bool return_counts = ctx->Attrs().Get<bool>("return_counts");
auto axis_vec = ctx->Attrs().Get<std::vector<int>>("axis");
if (return_inverse) {
OP_INOUT_CHECK(ctx->HasOutput("Index"), "Output", "Index",
"unique_consecutive");
}
if (return_counts) {
OP_INOUT_CHECK(ctx->HasOutput("Counts"), "Output", "Counts",
"unique_consecutive");
}
if (axis_vec.empty()) {
ctx->SetOutputDim("Out", {-1});
if (return_inverse) {
ctx->SetOutputDim("Index", {framework::product(in_dims)});
}
} else {
int axis = axis_vec[0];
if (axis < 0) {
axis += in_dims.size();
}
PADDLE_ENFORCE_LT(
axis, in_dims.size(),
platform::errors::InvalidArgument("The axis(%d) should be less than "
"the dimension size(%d) of x.",
axis, in_dims.size()));
auto out_dims = in_dims;
out_dims[axis] = -1;
ctx->SetOutputDim("Out", out_dims);
if (return_inverse) {
ctx->SetOutputDim("Index", {in_dims[axis]});
}
}
if (return_counts) {
ctx->SetOutputDim("Counts", {-1});
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
}
};
class UniqueConsecutiveOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "The input tensor of unique_consecutive op.");
AddAttr<int>("dtype",
"(int, default 5(FP32)) "
"data type for output index")
.SetDefault(framework::proto::VarType::FP32);
AddOutput("Out", "A unique consecutive subsequence for input tensor.");
AddOutput("Index",
"The indices for where elements in the original input ended up "
"in the returned unique tensor.")
.AsDispensable();
AddOutput("Counts", "The counts for each unique element.").AsDispensable();
AddAttr<bool>(
"return_inverse",
"If True, also return the indices for where elements"
" in the original input ended up in the returned unique tensor.")
.SetDefault(false);
AddAttr<bool>("return_counts",
"If True, also return the counts for each unique element.")
.SetDefault(false);
AddAttr<std::vector<int>>(
"axis",
"The axis to apply unique. If None, the input will be flattened.")
.SetDefault({});
AddComment(R"DOC(
This function is different from paddle.unique() in the sense that this
function only eliminates consecutive duplicate values.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(unique_consecutive, ops::UniqueConsecutiveOp,
ops::UniqueConsecutiveOpMaker);
REGISTER_OP_CPU_KERNEL(
unique_consecutive,
ops::UniqueConsecutiveKernel<paddle::platform::CPUDeviceContext, float>,
ops::UniqueConsecutiveKernel<paddle::platform::CPUDeviceContext, double>,
ops::UniqueConsecutiveKernel<paddle::platform::CPUDeviceContext, int32_t>,
ops::UniqueConsecutiveKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_VERSION(unique_consecutive)
.AddCheckpoint(
R"ROC(
Upgrade unique_consecutive, add 2 outputs [Indices, Counts] and 3 attribute
[return_inverse, return_counts, axis].
)ROC",
paddle::framework::compatible::OpVersionDesc()
.NewOutput("Counts", "The counts for each unique element.")
.NewAttr("return_inverse",
"If True, also return the indices for where elements"
" in the original input ended up in the returned unique "
"tensor.",
false)
.NewAttr("return_counts",
"If True, also return the counts for each unique element.",
false)
.NewAttr("axis",
"The axis to apply unique. If None, the input will be "
"flattened.",
std::vector<int>{}));
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thrust/adjacent_difference.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#include <thrust/scatter.h>
#include <thrust/sequence.h>
#include <thrust/unique.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/framework/tensor_util.h" // TensorToVector()
#include "paddle/fluid/operators/unique_consecutive_op.h" // TransComute()
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
// Binary function 'equal_to'
template <typename InT>
struct BinaryEqual {
int64_t col;
const InT* in_trans_data;
BinaryEqual(int64_t _col, const InT* _in_trans_data)
: col(_col), in_trans_data(_in_trans_data) {}
__device__ bool operator()(int64_t a, int64_t b) const {
for (int64_t i = 0; i < col; ++i) {
InT lhs = in_trans_data[i + a * col];
InT rhs = in_trans_data[i + b * col];
if (lhs != rhs) {
return false;
}
}
return true;
}
};
// Binary function 'not_equal_to'
template <typename InT>
struct BinaryNotEqual {
int64_t col;
const InT* in_trans_data;
BinaryNotEqual(int64_t _col, const InT* _in_trans_data)
: col(_col), in_trans_data(_in_trans_data) {}
__device__ bool operator()(int64_t a, int64_t b) const {
for (int64_t i = 0; i < col; ++i) {
InT lhs = in_trans_data[i + a * col];
InT rhs = in_trans_data[i + b * col];
if (lhs != rhs) {
return true;
}
}
return false;
}
};
// index_select() function for Tensor
template <typename InT, typename IndexT>
void IndexSelect(const framework::ExecutionContext& context,
const Tensor& input, const Tensor& index, Tensor* output,
int dim) {
auto input_dim = input.dims();
auto input_dim_size = input_dim.size();
auto output_dim = output->dims();
auto slice_size = 1;
for (auto i = dim + 1; i < input_dim_size; i++) {
slice_size *= input_dim[i];
}
auto input_width = slice_size * input_dim[dim];
auto output_width = slice_size * output_dim[dim];
auto outer_nums = 1;
for (auto i = 0; i < dim; i++) {
outer_nums *= input_dim[i];
}
auto index_size = index.dims()[0];
std::vector<InT> input_vec;
std::vector<IndexT> index_vec;
TensorToVector(input, context.device_context(), &input_vec);
TensorToVector(index, context.device_context(), &index_vec);
std::vector<InT> out_vec(output->numel());
for (int i = 0; i < index_size; i++) {
PADDLE_ENFORCE_GE(
index_vec[i], 0,
platform::errors::InvalidArgument(
"Variable value (index) of OP(index_select) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
input_dim[dim], index_vec[i]));
PADDLE_ENFORCE_LT(
index_vec[i], input_dim[dim],
platform::errors::InvalidArgument(
"Variable value (index) of OP(index_select) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
input_dim[dim], index_vec[i]));
}
for (auto i = 0; i < outer_nums; i++) {
auto input_start_offset = i * input_width;
auto output_start_offset = i * output_width;
for (auto j = 0; j < index_size; j++) {
IndexT index_value = index_vec[j];
for (auto k = 0; k < slice_size; k++) {
out_vec[output_start_offset + j * slice_size + k] =
input_vec[input_start_offset + index_value * slice_size + k];
}
}
}
output->mutable_data<InT>(context.GetPlace());
framework::TensorFromVector(out_vec, context.device_context(), output);
output->Resize(output_dim);
}
// The core logic of computing Unique Consecutive for a flattend Tensor
template <typename InT, typename IndexT, typename equal_T, typename not_equal_T>
static void UniqueConsecutiveFlattendCUDATensor(
const framework::ExecutionContext& context, const Tensor& in, Tensor* out,
bool return_inverse, bool return_counts, equal_T equal,
not_equal_T not_equal, int64_t num_input) {
// 0. Prepration
Tensor in_hat;
framework::TensorCopy(in, context.GetPlace(), &in_hat);
auto in_data_hat = in_hat.mutable_data<InT>(context.GetPlace());
Tensor sorted_indices;
sorted_indices.Resize(framework::make_ddim({num_input}));
auto sorted_indices_data =
sorted_indices.mutable_data<IndexT>(context.GetPlace());
thrust::sequence(thrust::device, sorted_indices_data,
sorted_indices_data + num_input);
// 1. Calculate op result: 'out'
Tensor range;
range.Resize(framework::make_ddim({num_input + 1}));
auto range_data_ptr = range.mutable_data<IndexT>(context.GetPlace());
thrust::sequence(thrust::device, range_data_ptr,
range_data_ptr + num_input + 1);
framework::TensorCopy(in_hat, context.GetPlace(), out);
int num_out;
auto out_data = out->mutable_data<InT>(context.GetPlace());
num_out = thrust::unique_by_key(thrust::device, out_data,
out_data + num_input, range_data_ptr, equal)
.first -
out_data;
out->Resize(framework::make_ddim({num_out}));
// 2. Calculate inverse index: 'inverse'
if (return_inverse) {
Tensor* inverse = context.Output<Tensor>("Index");
inverse->Resize(framework::make_ddim({num_input}));
auto inverse_data = inverse->mutable_data<IndexT>(context.GetPlace());
Tensor inv_loc;
inv_loc.Resize(framework::make_ddim({num_input}));
auto inv_loc_data_ptr = inv_loc.mutable_data<IndexT>(context.GetPlace());
thrust::adjacent_difference(thrust::device, in_data_hat,
in_data_hat + num_input, inv_loc_data_ptr,
not_equal);
thrust::device_ptr<IndexT> inv_loc_data_dev(inv_loc_data_ptr);
inv_loc_data_dev[0] = 0; // without device_ptr, segmentation fault
thrust::inclusive_scan(thrust::device, inv_loc_data_ptr,
inv_loc_data_ptr + num_input, inv_loc_data_ptr);
thrust::scatter(thrust::device, inv_loc_data_ptr,
inv_loc_data_ptr + num_input, sorted_indices_data,
inverse_data);
}
// 3. Calculate 'counts'
if (return_counts) {
Tensor* counts = context.Output<Tensor>("Counts");
counts->Resize(framework::make_ddim({num_out}));
auto count_data = counts->mutable_data<IndexT>(context.GetPlace());
// init 'count_data' as 0
thrust::fill(thrust::device, count_data, count_data + num_out, 0);
thrust::device_ptr<IndexT> range_data_ptr_dev(range_data_ptr);
range_data_ptr_dev[num_out] = num_input;
thrust::adjacent_difference(thrust::device, range_data_ptr + 1,
range_data_ptr + num_out + 1, count_data);
}
}
// The logic of compute unique with axis required, it's a little different
// from above function
template <typename InT, typename IndexT, typename equal_T, typename not_equal_T>
static void ComputeUniqueConsecutiveDims(
const framework::ExecutionContext& context, Tensor* sorted_indices,
IndexT* sorted_indices_data, Tensor* out, bool return_inverse,
bool return_counts, equal_T equal, not_equal_T not_equal, int64_t row) {
// 1. inverse indices: 'inverse'
Tensor* inverse = context.Output<Tensor>("Index");
inverse->Resize(framework::make_ddim({row}));
auto inverse_data = inverse->mutable_data<IndexT>(context.GetPlace());
Tensor inv_loc;
inv_loc.Resize(framework::make_ddim({row}));
auto inv_loc_data_ptr = inv_loc.mutable_data<IndexT>(context.GetPlace());
thrust::adjacent_difference(thrust::device, sorted_indices_data,
sorted_indices_data + row, inv_loc_data_ptr,
not_equal);
thrust::device_ptr<IndexT> inv_loc_data_dev(inv_loc_data_ptr);
inv_loc_data_dev[0] = 0;
thrust::inclusive_scan(thrust::device, inv_loc_data_ptr,
inv_loc_data_ptr + row, inv_loc_data_ptr);
thrust::scatter(thrust::device, inv_loc_data_ptr, inv_loc_data_ptr + row,
sorted_indices_data, inverse_data);
// 2. sorted indices
Tensor range;
range.Resize(framework::make_ddim({row + 1}));
auto range_data_ptr = range.mutable_data<IndexT>(context.GetPlace());
thrust::sequence(thrust::device, range_data_ptr, range_data_ptr + row + 1);
int num_out;
num_out =
thrust::unique_by_key(thrust::device, sorted_indices_data,
sorted_indices_data + row, range_data_ptr, equal)
.first -
sorted_indices_data;
thrust::device_ptr<IndexT> range_data_ptr_dev(range_data_ptr);
range_data_ptr_dev[num_out] = row;
sorted_indices->Resize(framework::make_ddim({num_out}));
// 3. counts: 'counts'
Tensor* counts = context.Output<Tensor>("Counts");
counts->Resize(framework::make_ddim({num_out}));
auto count_data = counts->mutable_data<IndexT>(context.GetPlace());
thrust::fill(thrust::device, count_data, count_data + row, 0);
thrust::adjacent_difference(thrust::device, range_data_ptr + 1,
range_data_ptr + row + 1, count_data);
}
// Calculate unique consecutive when 'axis' is set
template <typename DeviceContext, typename InT, typename IndexT>
static void UniqueConsecutiveDimsCUDATensor(
const framework::ExecutionContext& context, const Tensor& in, Tensor* out,
bool return_inverse, bool return_counts, int axis) {
// 1. Transpose & reshape
// Transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2]
std::vector<int> permute(in.dims().size());
std::iota(permute.begin(), permute.end(), 0);
permute[axis] = 0;
permute[0] = axis;
std::vector<int64_t> in_trans_dims_vec(framework::vectorize(in.dims()));
in_trans_dims_vec[axis] = in.dims()[0];
in_trans_dims_vec[0] = in.dims()[axis];
framework::Tensor in_trans;
framework::DDim in_trans_dims = framework::make_ddim(in_trans_dims_vec);
in_trans.Resize(in_trans_dims);
in_trans.mutable_data<InT>(context.GetPlace());
auto& dev_ctx = context.cuda_device_context();
TransCompute<DeviceContext, InT>(in.dims().size(), // num of dims
dev_ctx, // device
in, // original Tensor
&in_trans, // Tensor after reshape
permute); // index of axis
// Reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2]
framework::DDim in_trans_flat_dims =
framework::flatten_to_2d(in_trans_dims, 1);
in_trans.Resize(in_trans_flat_dims);
// now 'in_trans' is 2D
int64_t col = in_trans.dims()[1];
int64_t row = in_trans.dims()[0];
const InT* in_trans_data = in_trans.data<InT>();
Tensor sorted_indices;
sorted_indices.Resize(framework::make_ddim({row}));
auto sorted_indices_data =
sorted_indices.mutable_data<IndexT>(context.GetPlace());
// 2. Calculate 'inverse', 'counts'
// Init index
thrust::sequence(thrust::device, sorted_indices_data,
sorted_indices_data + row);
ComputeUniqueConsecutiveDims<InT, IndexT>(
context, &sorted_indices, sorted_indices_data, out, return_inverse,
return_counts, BinaryEqual<InT>(col, in_trans_data),
BinaryNotEqual<InT>(col, in_trans_data), row);
// 3. Select indices and reshape back to get 'out'
Tensor out_trans;
std::vector<int64_t> out_trans_dims_vec = in_trans_dims_vec;
out_trans_dims_vec[0] = sorted_indices.numel();
out_trans.Resize(framework::make_ddim(out_trans_dims_vec));
out_trans.mutable_data<InT>(context.GetPlace());
IndexSelect<InT, IndexT>(context, in_trans, sorted_indices, &out_trans, 0);
std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]);
out->Resize(framework::make_ddim(out_trans_dims_vec));
out->mutable_data<InT>(context.GetPlace());
std::vector<framework::Tensor> out_trans_unbind = Unbind(out_trans);
math::ConcatFunctor<DeviceContext, InT> concat_functor;
concat_functor(dev_ctx, out_trans_unbind, 0, &out_trans);
TransCompute<DeviceContext, InT>(out_trans.dims().size(), dev_ctx, out_trans,
out, permute);
}
// functor for processing a flattend Tensor
template <typename DeviceContext, typename InT>
struct UniqueConsecutiveFlattendCUDAFunctor {
const framework::ExecutionContext& ctx_;
const Tensor& in_;
Tensor* out_;
const bool return_inverse_;
const bool return_counts_;
UniqueConsecutiveFlattendCUDAFunctor(
const framework::ExecutionContext& context, const Tensor& in, Tensor* out,
bool return_inverse, bool return_counts)
: ctx_(context),
in_(in),
out_(out),
return_inverse_(return_inverse),
return_counts_(return_counts) {}
template <typename IndexT>
void apply() const {
UniqueConsecutiveFlattendCUDATensor<InT, IndexT>(
ctx_, in_, out_, return_inverse_, return_counts_,
thrust::equal_to<InT>(), thrust::not_equal_to<InT>(), in_.numel());
}
};
// functor for processing a multi-dimentional Tensor
template <typename DeviceContext, typename InT>
struct UniqueConsecutiveDimsCUDAFunctor {
const framework::ExecutionContext& ctx_;
const Tensor& in_;
Tensor* out_;
const int axis_;
const bool return_inverse_;
const bool return_counts_;
UniqueConsecutiveDimsCUDAFunctor(const framework::ExecutionContext& context,
const Tensor& in, Tensor* out,
const int axis, bool return_inverse,
bool return_counts)
: ctx_(context),
in_(in),
out_(out),
axis_(axis),
return_inverse_(return_inverse),
return_counts_(return_counts) {}
template <typename IndexT>
void apply() const {
UniqueConsecutiveDimsCUDATensor<DeviceContext, InT, IndexT>(
ctx_, in_, out_, return_inverse_, return_counts_, axis_);
}
};
// Unique_Consecutive_op CUDA implementation.
template <typename InT>
class UniqueConsecutiveKernel<platform::CUDADeviceContext, InT>
: public framework::OpKernel<InT> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<framework::Tensor>("X");
auto* out = context.Output<framework::Tensor>("Out");
auto data_type = static_cast<framework::proto::VarType::Type>(
context.Attr<int>("dtype"));
if (data_type == framework::proto::VarType::INT32) {
PADDLE_ENFORCE_LE(
x->numel() + 1, INT_MAX,
platform::errors::InvalidArgument(
"The number of elements in Input(X) should be less than or "
"equal to INT_MAX, but received num is %d. Please set `dtype` to "
"int64.",
x->numel()));
}
std::vector<int> axis_vec = context.Attr<std::vector<int>>("axis");
bool return_inverse = context.Attr<bool>("return_inverse");
bool return_counts = context.Attr<bool>("return_counts");
// if 'axis' is not required, flatten the Tensor.
if (axis_vec.empty()) {
framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveFlattendCUDAFunctor<platform::CUDADeviceContext,
InT>(
context, *x, out, return_inverse, return_counts));
} else {
// 'axis' is required.
int axis = axis_vec[0];
framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveDimsCUDAFunctor<platform::CUDADeviceContext, InT>(
context, *x, out, axis, return_inverse, return_counts));
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
unique_consecutive,
ops::UniqueConsecutiveKernel<paddle::platform::CUDADeviceContext, float>,
ops::UniqueConsecutiveKernel<paddle::platform::CUDADeviceContext, double>,
ops::UniqueConsecutiveKernel<paddle::platform::CUDADeviceContext, int32_t>,
ops::UniqueConsecutiveKernel<paddle::platform::CUDADeviceContext, int64_t>);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <cmath>
#include <numeric>
#include <set>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/transpose_op.h"
#include "paddle/fluid/operators/unique_op.h"
namespace paddle {
namespace operators {
template <typename InT, typename IndexT>
static void UniqueConsecutiveFlattendTensor(
const framework::ExecutionContext& context, const framework::Tensor& in,
framework::Tensor* out, bool return_inverse, bool return_counts) {
const InT* in_data = in.data<InT>();
std::vector<InT> out_vec(in.numel());
std::vector<IndexT> inverse_vec(in.numel());
std::vector<IndexT> counts_vec(in.numel());
memcpy(out_vec.data(), in_data, in.numel() * sizeof(InT));
InT* p = out_vec.data();
int64_t last = 0;
IndexT* q = counts_vec.data();
for (int64_t i = 0; i < in.numel(); i++) {
if (in_data[i] != *p) {
*(++p) = in_data[i];
if (return_counts) {
*(q++) = i - last;
last = i;
}
}
if (return_inverse) {
inverse_vec[i] = p - out_vec.data();
}
}
int64_t output_size = p - out_vec.data() + 1;
if (return_counts) {
*q = in.numel() - last;
counts_vec.resize(output_size);
}
out_vec.resize(output_size);
out->Resize(framework::make_ddim({output_size}));
auto* out_data = out->mutable_data<InT>(context.GetPlace());
std::copy(out_vec.begin(), out_vec.end(), out_data);
if (return_inverse) {
auto* inverse = context.Output<framework::Tensor>("Index");
inverse->Resize(framework::make_ddim({in.numel()}));
auto* inverse_data = inverse->mutable_data<IndexT>(context.GetPlace());
std::copy(inverse_vec.begin(), inverse_vec.end(), inverse_data);
}
if (return_counts) {
auto* count = context.Output<framework::Tensor>("Counts");
count->Resize(framework::make_ddim({out->numel()}));
auto* counts_data = count->mutable_data<IndexT>(context.GetPlace());
std::copy(counts_vec.begin(), counts_vec.end(), counts_data);
}
}
template <class ForwardIt, typename InT, typename IndexT>
static ForwardIt UniqueConsecutiveDimImpl(
const framework::ExecutionContext& context, ForwardIt first, ForwardIt last,
const std::vector<IndexT>& sorted_indices_vec,
std::vector<IndexT>* inverse_vec, std::vector<IndexT>* counts_vec) {
if (first == last) {
return last;
}
(*inverse_vec)[sorted_indices_vec[0]] = 0;
(*counts_vec)[0] = 1;
ForwardIt begin = first;
ForwardIt result = first;
while (++first != last) {
int64_t idx_first = std::distance(begin, first);
int64_t idx_result = std::distance(begin, result);
if (!Equal<InT>(*result, *first)) {
if (++result != first) {
*result = std::move(*first);
}
idx_result += 1;
}
(*inverse_vec)[sorted_indices_vec[idx_first]] = idx_result;
(*counts_vec)[idx_result] += 1;
}
return ++result;
}
template <typename DeviceContext, typename InT, typename IndexT>
static void UniqueConsecutiveDim(const framework::ExecutionContext& context,
const framework::Tensor& in,
framework::Tensor* out, bool return_inverse,
bool return_counts, int axis) {
// transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2]
std::vector<int> permute(in.dims().size());
std::iota(permute.begin(), permute.end(), 0);
permute[axis] = 0;
permute[0] = axis;
std::vector<int64_t> in_trans_dims_vec(framework::vectorize(in.dims()));
in_trans_dims_vec[axis] = in.dims()[0];
in_trans_dims_vec[0] = in.dims()[axis];
framework::Tensor in_trans;
framework::DDim in_trans_dims = framework::make_ddim(in_trans_dims_vec);
in_trans.Resize(in_trans_dims);
in_trans.mutable_data<InT>(context.GetPlace());
auto& dev_ctx = context.template device_context<DeviceContext>();
TransCompute<DeviceContext, InT>(in.dims().size(), dev_ctx, in, &in_trans,
permute);
// reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2]
framework::DDim in_trans_flat_dims =
framework::flatten_to_2d(in_trans_dims, 1);
in_trans.Resize(in_trans_flat_dims);
std::vector<IndexT> sorted_indices_vec(in_trans.dims()[0]);
std::iota(sorted_indices_vec.begin(), sorted_indices_vec.end(), 0);
int64_t col = in_trans.dims()[1];
const InT* in_trans_data = in_trans.data<InT>();
// sort tensor according to indices
framework::Tensor input_sorted;
input_sorted.Resize(in_trans_dims);
input_sorted.mutable_data<InT>(context.GetPlace());
InT* input_sorted_data = input_sorted.data<InT>();
for (size_t i = 0; i < sorted_indices_vec.size(); ++i) {
memcpy(input_sorted_data + i * col,
in_trans_data + static_cast<int64_t>(sorted_indices_vec[i]) * col,
col * sizeof(InT));
}
std::vector<framework::Tensor> input_unbind = Unbind(input_sorted);
std::vector<IndexT> inverse_vec(sorted_indices_vec.size(), 0);
std::vector<IndexT> counts_vec(sorted_indices_vec.size(), 0);
auto last =
UniqueConsecutiveDimImpl<std::vector<framework::Tensor>::iterator, InT>(
context, input_unbind.begin(), input_unbind.end(), sorted_indices_vec,
&inverse_vec, &counts_vec);
input_unbind.erase(last, input_unbind.end());
counts_vec.erase(counts_vec.begin() + input_unbind.size(), counts_vec.end());
math::ConcatFunctor<DeviceContext, InT> concat_functor;
framework::Tensor out_trans;
std::vector<int64_t> out_trans_dims_vec = in_trans_dims_vec;
out_trans_dims_vec[0] = input_unbind.size();
out_trans.Resize(framework::make_ddim(out_trans_dims_vec));
out_trans.mutable_data<InT>(context.GetPlace());
std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]);
out->Resize(framework::make_ddim(out_trans_dims_vec));
out->mutable_data<InT>(context.GetPlace());
concat_functor(dev_ctx, input_unbind, 0, &out_trans);
TransCompute<DeviceContext, InT>(out_trans.dims().size(), dev_ctx, out_trans,
out, permute);
if (return_inverse) {
auto* inverse = context.Output<framework::Tensor>("Index");
framework::TensorFromVector(inverse_vec, context.device_context(), inverse);
}
if (return_counts) {
auto* count = context.Output<framework::Tensor>("Counts");
framework::TensorFromVector(counts_vec, context.device_context(), count);
}
}
template <typename DeviceContext, typename InT>
struct UniqueConsecutiveFlattendTensorFunctor {
const framework::ExecutionContext& ctx_;
const framework::Tensor& in_;
framework::Tensor* out_;
const bool return_inverse_;
const bool return_counts_;
UniqueConsecutiveFlattendTensorFunctor(
const framework::ExecutionContext& context, const framework::Tensor& in,
framework::Tensor* out, bool return_inverse, bool return_counts)
: ctx_(context),
in_(in),
out_(out),
return_inverse_(return_inverse),
return_counts_(return_counts) {}
template <typename IndexT>
void apply() const {
UniqueConsecutiveFlattendTensor<InT, IndexT>(
ctx_, in_, out_, return_inverse_, return_counts_);
}
};
template <typename DeviceContext, typename InT>
struct UniqueConsecutiveDimFunctor {
const framework::ExecutionContext& ctx_;
const framework::Tensor& in_;
framework::Tensor* out_;
const int axis_;
const bool return_inverse_;
const bool return_counts_;
UniqueConsecutiveDimFunctor(const framework::ExecutionContext& context,
const framework::Tensor& in,
framework::Tensor* out, const int axis,
bool return_inverse, bool return_counts)
: ctx_(context),
in_(in),
out_(out),
axis_(axis),
return_inverse_(return_inverse),
return_counts_(return_counts) {}
template <typename IndexT>
void apply() const {
UniqueConsecutiveDim<DeviceContext, InT, IndexT>(
ctx_, in_, out_, return_inverse_, return_counts_, axis_);
}
};
template <typename DeviceContext, typename T>
class UniqueConsecutiveKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<framework::Tensor>("X");
auto* out = context.Output<framework::Tensor>("Out");
auto data_type = static_cast<framework::proto::VarType::Type>(
context.Attr<int>("dtype"));
if (data_type == framework::proto::VarType::INT32) {
PADDLE_ENFORCE_LE(
x->numel(), INT_MAX,
platform::errors::InvalidArgument(
"The number of elements in Input(X) should be less than or "
"equal to INT_MAX, but received num is %d. Please set `dtype` to "
"int64.",
x->numel()));
}
std::vector<int> axis_vec = context.Attr<std::vector<int>>("axis");
bool return_inverse = context.Attr<bool>("return_inverse");
bool return_counts = context.Attr<bool>("return_counts");
if (axis_vec.empty()) {
framework::VisitDataTypeTiny(
data_type, UniqueConsecutiveFlattendTensorFunctor<DeviceContext, T>(
context, *x, out, return_inverse, return_counts));
} else {
int axis = axis_vec[0];
framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveDimFunctor<DeviceContext, T>(
context, *x, out, axis, return_inverse, return_counts));
}
}
};
} // namespace operators
} // namespace paddle
...@@ -86,6 +86,7 @@ std::map<std::string, std::set<std::string>> op_outs_map = { ...@@ -86,6 +86,7 @@ std::map<std::string, std::set<std::string>> op_outs_map = {
{"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance",
"ReserveSpace"}}, "ReserveSpace"}},
{"unique", {"Out", "Index", "Indices", "Counts"}}, {"unique", {"Out", "Index", "Indices", "Counts"}},
{"unique_consecutive", {"Out", "Index", "Counts"}},
{"generate_proposals", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}}, {"generate_proposals", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}},
{"collect_fpn_proposals", {"FpnRois", "RoisNum"}}, {"collect_fpn_proposals", {"FpnRois", "RoisNum"}},
{"matrix_nms", {"Out", "Index", "RoisNum"}}, {"matrix_nms", {"Out", "Index", "RoisNum"}},
......
...@@ -142,6 +142,7 @@ from .tensor.manipulation import squeeze_ # noqa: F401 ...@@ -142,6 +142,7 @@ from .tensor.manipulation import squeeze_ # noqa: F401
from .tensor.manipulation import stack # noqa: F401 from .tensor.manipulation import stack # noqa: F401
from .tensor.manipulation import strided_slice # noqa: F401 from .tensor.manipulation import strided_slice # noqa: F401
from .tensor.manipulation import unique # noqa: F401 from .tensor.manipulation import unique # noqa: F401
from .tensor.manipulation import unique_consecutive # noqa: F401
from .tensor.manipulation import unsqueeze # noqa: F401 from .tensor.manipulation import unsqueeze # noqa: F401
from .tensor.manipulation import unsqueeze_ # noqa: F401 from .tensor.manipulation import unsqueeze_ # noqa: F401
from .tensor.manipulation import unstack # noqa: F401 from .tensor.manipulation import unstack # noqa: F401
...@@ -470,6 +471,7 @@ __all__ = [ # noqa ...@@ -470,6 +471,7 @@ __all__ = [ # noqa
'randn', 'randn',
'strided_slice', 'strided_slice',
'unique', 'unique',
'unique_consecutive',
'set_cuda_rng_state', 'set_cuda_rng_state',
'set_printoptions', 'set_printoptions',
'std', 'std',
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
import paddle
import paddle.fluid as fluid
import paddle.fluid.framework as framework
def reference_unique_consecutive(X, return_inverse=False, return_counts=False):
"""
Reference unique_consecutive implementation using python.
Args:
x(Tensor): the input tensor, it's data type should be float32, float64, int32, int64.
return_inverse(bool, optional): If True, also return the indices for where elements in
the original input ended up in the returned unique consecutive tensor. Default is False.
return_counts(bool, optional): If True, also return the counts for each unique consecutive element.
"""
X = list(X)
counts_vec = [1] * len(X)
i = 0
counts = 1
last = 0
inverse_vec = [0] * len(X)
inverse_vec[last] = i
cnt = 0
while i < len(X) - 1:
if X[i] == X[i + 1]:
if return_counts:
counts_vec[cnt] += 1
del X[i]
else:
i += 1
cnt += 1
if return_inverse:
last += 1
inverse_vec[last] = i
if return_counts:
counts_vec = counts_vec[:len(X)]
if return_inverse and return_counts:
return X, np.array(inverse_vec), np.array(counts_vec)
elif return_counts:
return X, np.array(counts_vec)
elif return_inverse:
return X, np.array(inverse_vec)
else:
return X
class TestUniqueConsecutiveOp(OpTest):
"""case 1"""
def config(self):
self.x_size = 100
self.x_range = 20
self.return_inverse = False
self.return_counts = False
def init_kernel_type(self):
self.dtype = "float32" if core.is_compiled_with_rocm() else "float64"
def setUp(self):
self.init_kernel_type()
self.config()
self.op_type = "unique_consecutive"
x = np.random.randint(self.x_range, size=self.x_size).astype(self.dtype)
result = reference_unique_consecutive(x, self.return_inverse,
self.return_counts)
out = reference_unique_consecutive(x)
out = np.array(out).astype(self.dtype)
self.inputs = {'X': x, }
self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)}
self.outputs = {'Out': out, }
def test_check_output(self):
self.check_output()
class TestUniqueConsecutiveOp2(TestUniqueConsecutiveOp):
"""case 2"""
def config(self):
self.x_size = 100
self.x_range = 20
self.return_inverse = True
self.return_counts = False
def setUp(self):
self.init_kernel_type()
self.config()
self.op_type = "unique_consecutive"
x = np.random.randint(self.x_range, size=self.x_size).astype(self.dtype)
result, inverse = reference_unique_consecutive(x, self.return_inverse,
self.return_counts)
result = np.array(result).astype(self.dtype)
inverse = inverse.astype(self.dtype)
self.inputs = {'X': x, }
self.attrs = {
'return_inverse': self.return_inverse,
'dtype': int(core.VarDesc.VarType.INT32)
}
self.outputs = {'Out': result, 'Index': inverse}
class TestUniqueConsecutiveOp3(TestUniqueConsecutiveOp):
"""case 3"""
def config(self):
self.x_size = 100
self.x_range = 20
self.return_inverse = False
self.return_counts = True
def setUp(self):
self.init_kernel_type()
self.config()
self.op_type = "unique_consecutive"
x = np.random.randint(self.x_range, size=self.x_size).astype(self.dtype)
result, counts = reference_unique_consecutive(x, self.return_inverse,
self.return_counts)
result = np.array(result).astype(self.dtype)
counts = counts.astype(self.dtype)
self.inputs = {'X': x, }
self.attrs = {
'return_counts': self.return_counts,
'dtype': int(core.VarDesc.VarType.INT32)
}
self.outputs = {'Out': result, 'Counts': counts}
class TestUniqueConsecutiveOp4(TestUniqueConsecutiveOp):
"""case 4"""
def config(self):
self.x_size = 100
self.x_range = 20
self.return_inverse = True
self.return_counts = True
def setUp(self):
self.init_kernel_type()
self.config()
self.op_type = "unique_consecutive"
x = np.random.randint(self.x_range, size=self.x_size).astype(self.dtype)
result, inverse, counts = reference_unique_consecutive(
x, self.return_inverse, self.return_counts)
result = np.array(result).astype(self.dtype)
inverse = inverse.astype(self.dtype)
counts = counts.astype(self.dtype)
self.inputs = {'X': x, }
self.attrs = {
'return_inverse': self.return_inverse,
'return_counts': self.return_counts,
'dtype': int(core.VarDesc.VarType.INT32)
}
self.outputs = {'Out': result, 'Index': inverse, 'Counts': counts}
class TestUniqueConsecutiveAPI(unittest.TestCase):
def setUp(self):
self.places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
self.places.append(fluid.CUDAPlace(0))
def check_static_result(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
paddle.enable_static()
input_x = fluid.data(name="input_x", shape=[100, ], dtype="float32")
result = paddle.unique_consecutive(input_x)
x_np = np.random.randint(20, size=100).astype("float32")
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input_x": x_np},
fetch_list=[result])
def test_static(self):
for place in self.places:
self.check_static_result(place=place)
def test_dygraph(self):
for place in self.places:
with fluid.dygraph.guard(place):
input_x = np.random.randint(20, size=100).astype("float64")
x = paddle.to_tensor(input_x)
result = paddle.unique_consecutive(x)
class TestUniqueConsecutiveCase2API(unittest.TestCase):
def setUp(self):
self.places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
self.places.append(fluid.CUDAPlace(0))
def check_static_result(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
paddle.enable_static()
input_x = fluid.data(name="input_x", shape=[100, ], dtype="float32")
result, inverse, counts = paddle.unique_consecutive(
input_x, return_inverse=True, return_counts=True)
x_np = np.random.randint(20, size=100).astype("float32")
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input_x": x_np},
fetch_list=[result])
def test_static(self):
for place in self.places:
self.check_static_result(place=place)
def test_dygraph(self):
for place in self.places:
with fluid.dygraph.guard(place):
input_x = np.random.randint(20, size=100).astype("float64")
x = paddle.to_tensor(input_x)
result, inverse, counts = paddle.unique_consecutive(
x, return_inverse=True, return_counts=True)
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
...@@ -88,6 +88,7 @@ from .manipulation import squeeze_ # noqa: F401 ...@@ -88,6 +88,7 @@ from .manipulation import squeeze_ # noqa: F401
from .manipulation import stack # noqa: F401 from .manipulation import stack # noqa: F401
from .manipulation import strided_slice # noqa: F401 from .manipulation import strided_slice # noqa: F401
from .manipulation import unique # noqa: F401 from .manipulation import unique # noqa: F401
from .manipulation import unique_consecutive # noqa: F401
from .manipulation import unsqueeze # noqa: F401 from .manipulation import unsqueeze # noqa: F401
from .manipulation import unsqueeze_ # noqa: F401 from .manipulation import unsqueeze_ # noqa: F401
from .manipulation import unstack # noqa: F401 from .manipulation import unstack # noqa: F401
...@@ -333,6 +334,7 @@ tensor_method_func = [ #noqa ...@@ -333,6 +334,7 @@ tensor_method_func = [ #noqa
'strided_slice', 'strided_slice',
'transpose', 'transpose',
'unique', 'unique',
'unique_consecutive',
'unsqueeze', 'unsqueeze',
'unsqueeze_', 'unsqueeze_',
'unstack', 'unstack',
......
...@@ -717,6 +717,112 @@ def squeeze_(x, axis=None, name=None): ...@@ -717,6 +717,112 @@ def squeeze_(x, axis=None, name=None):
return out return out
def unique_consecutive(x,
return_inverse=False,
return_counts=False,
axis=None,
dtype="int64",
name=None):
r"""
Eliminates all but the first element from every consecutive group of equivalent elements.
.. note:: This function is different from :func:`paddle.unique` in the sense that this function
only eliminates consecutive duplicate values. This semantics is similar to `std::unique` in C++.
Args:
x(Tensor): the input tensor, it's data type should be float32, float64, int32, int64.
return_inverse(bool, optional): If True, also return the indices for where elements in
the original input ended up in the returned unique consecutive tensor. Default is False.
return_counts(bool, optional): If True, also return the counts for each unique consecutive element.
Default is False.
axis(int, optional): The axis to apply unique consecutive. If None, the input will be flattened.
Default is None.
dtype(np.dtype|str, optional): The data type `inverse` tensor: int32 or int64.
Default: int64.
name(str, optional): Name for the operation. For more information, please refer to
:ref:`api_guide_Name`. Default is None.
Returns:
tuple: (out, inverse, counts). `out` is the unique consecutive tensor for `x`. `inverse` is provided only if `return_inverse` is True. `counts` is provided only if `return_counts` is True.
Example:
.. code-block:: python
import paddle
x = paddle.to_tensor([1, 1, 2, 2, 3, 1, 1, 2])
output = paddle.unique_consecutive(x) #
np_output = output.numpy() # [1 2 3 1 2]
_, inverse, counts = paddle.unique_consecutive(x, return_inverse=True, return_counts=True)
np_inverse = inverse.numpy() # [0 0 1 1 2 3 3 4]
np_counts = inverse.numpy() # [2 2 1 2 1]
x = paddle.to_tensor([[2, 1, 3], [3, 0, 1], [2, 1, 3], [2, 1, 3]])
output = paddle.unique_consecutive(x, axis=0) #
np_output = output.numpy() # [2 1 3 0 1 2 1 3 2 1 3]
x = paddle.to_tensor([[2, 1, 3], [3, 0, 1], [2, 1, 3], [2, 1, 3]])
output = paddle.unique_consecutive(x, axis=0) #
np_output = output.numpy()
# [[2 1 3]
# [3 0 1]
# [2 1 3]]
"""
if axis is None:
axis = []
else:
axis = [axis]
attr_dtype = convert_np_dtype_to_dtype_(dtype)
if in_dygraph_mode():
out, inverse, counts = core.ops.unique_consecutive(
x, 'dtype', attr_dtype, 'return_inverse', return_inverse,
'return_counts', return_counts, 'axis', axis)
outs = [out]
if return_inverse:
outs.append(inverse)
if return_counts:
outs.append(counts)
if len(outs) == 1:
return outs[0]
return tuple(outs)
check_variable_and_dtype(x, "input",
['float32', 'float64', 'int32', 'int64'],
'unique_consecutive')
check_type(return_inverse, 'return_inverse', bool, 'unique_consecutive')
check_type(return_counts, 'return_counts', bool, 'unique_consecutive')
check_dtype(dtype, 'dtype', ['int32', 'int64'], 'unique_consecutive')
if len(axis) != 0:
check_type(axis[0], 'axis', int, 'unique_consecutive')
helper = LayerHelper('unique_consecutive', **locals())
attrs = {
'dtype': attr_dtype,
"return_inverse": return_inverse,
"return_counts": return_counts,
"axis": axis,
}
out = helper.create_variable_for_type_inference(
dtype=x.dtype, stop_gradient=True)
inverse = helper.create_variable_for_type_inference(
dtype=attr_dtype, stop_gradient=True)
counts = helper.create_variable_for_type_inference(
dtype=attr_dtype, stop_gradient=True)
outputs = {"Out": out, "Index": inverse, "Counts": counts}
outs = [out]
if return_inverse:
outs.append(inverse)
if return_counts:
outs.append(counts)
helper.append_op(
type="unique_consecutive",
inputs={"X": x},
attrs=attrs,
outputs=outputs)
if len(outs) == 1:
return outs[0]
return tuple(outs)
def unique(x, def unique(x,
return_index=False, return_index=False,
return_inverse=False, return_inverse=False,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册