未验证 提交 be746adf 编写于 作者: Y Yuang Liu 提交者: GitHub

[operator migration] Migrate kernel of unique consecutive op. (#44228)

上级 f1111f3c
...@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/unique_consecutive_op.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
namespace paddle { namespace paddle {
...@@ -118,11 +117,6 @@ namespace ops = paddle::operators; ...@@ -118,11 +117,6 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(unique_consecutive, REGISTER_OP_WITHOUT_GRADIENT(unique_consecutive,
ops::UniqueConsecutiveOp, ops::UniqueConsecutiveOp,
ops::UniqueConsecutiveOpMaker); ops::UniqueConsecutiveOpMaker);
REGISTER_OP_CPU_KERNEL(unique_consecutive,
ops::UniqueConsecutiveKernel<phi::CPUContext, float>,
ops::UniqueConsecutiveKernel<phi::CPUContext, double>,
ops::UniqueConsecutiveKernel<phi::CPUContext, int32_t>,
ops::UniqueConsecutiveKernel<phi::CPUContext, int64_t>);
REGISTER_OP_VERSION(unique_consecutive) REGISTER_OP_VERSION(unique_consecutive)
.AddCheckpoint( .AddCheckpoint(
R"ROC( R"ROC(
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
You may obtain a copy of the License at // You may obtain a copy of the License at
//
http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
//
Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
limitations under the License. */ // limitations under the License.
#pragma once #pragma once
#include <algorithm>
#include <cmath>
#include <numeric>
#include <set>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/transpose_op.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/fluid/operators/unique_op.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/unique_functor.h"
namespace phi {
namespace paddle { template <typename InT, typename IndexT, typename Context>
namespace operators { static void UniqueConsecutiveFlattenedTensor(const Context& context,
template <typename InT, typename IndexT> const DenseTensor& in,
static void UniqueConsecutiveFlattendTensor( DenseTensor* out,
const framework::ExecutionContext& context,
const framework::Tensor& in,
framework::Tensor* out,
bool return_inverse, bool return_inverse,
bool return_counts) { bool return_counts,
DenseTensor* inverse,
DenseTensor* count) {
const InT* in_data = in.data<InT>(); const InT* in_data = in.data<InT>();
std::vector<InT> out_vec(in.numel()); std::vector<InT> out_vec(in.numel());
std::vector<IndexT> inverse_vec(in.numel()); std::vector<IndexT> inverse_vec(in.numel());
...@@ -65,27 +60,57 @@ static void UniqueConsecutiveFlattendTensor( ...@@ -65,27 +60,57 @@ static void UniqueConsecutiveFlattendTensor(
out_vec.resize(output_size); out_vec.resize(output_size);
out->Resize(phi::make_ddim({output_size})); out->Resize(phi::make_ddim({output_size}));
auto* out_data = out->mutable_data<InT>(context.GetPlace()); auto* out_data = context.template Alloc<InT>(out);
std::copy(out_vec.begin(), out_vec.end(), out_data); std::copy(out_vec.begin(), out_vec.end(), out_data);
if (return_inverse) { if (return_inverse) {
auto* inverse = context.Output<framework::Tensor>("Index");
inverse->Resize(phi::make_ddim({in.numel()})); inverse->Resize(phi::make_ddim({in.numel()}));
auto* inverse_data = inverse->mutable_data<IndexT>(context.GetPlace()); auto* inverse_data = context.template Alloc<IndexT>(inverse);
std::copy(inverse_vec.begin(), inverse_vec.end(), inverse_data); std::copy(inverse_vec.begin(), inverse_vec.end(), inverse_data);
} }
if (return_counts) { if (return_counts) {
auto* count = context.Output<framework::Tensor>("Counts");
count->Resize(phi::make_ddim({out->numel()})); count->Resize(phi::make_ddim({out->numel()}));
auto* counts_data = count->mutable_data<IndexT>(context.GetPlace()); auto* counts_data = context.template Alloc<IndexT>(count);
std::copy(counts_vec.begin(), counts_vec.end(), counts_data); std::copy(counts_vec.begin(), counts_vec.end(), counts_data);
} }
} }
template <class ForwardIt, typename InT, typename IndexT> template <typename Context, typename InT>
struct UniqueConsecutiveFlattenedTensorFunctor {
const Context& ctx_;
const DenseTensor& in_;
DenseTensor* out_;
const bool return_inverse_;
const bool return_counts_;
DenseTensor* inverse_;
DenseTensor* count_;
UniqueConsecutiveFlattenedTensorFunctor(const Context& context,
const DenseTensor& in,
DenseTensor* out,
bool return_inverse,
bool return_counts,
DenseTensor* inverse,
DenseTensor* count)
: ctx_(context),
in_(in),
out_(out),
return_inverse_(return_inverse),
return_counts_(return_counts),
inverse_(inverse),
count_(count) {}
template <typename IndexT>
void apply() const {
UniqueConsecutiveFlattenedTensor<InT, IndexT, Context>(
ctx_, in_, out_, return_inverse_, return_counts_, inverse_, count_);
}
};
template <typename Context, class ForwardIt, typename InT, typename IndexT>
static ForwardIt UniqueConsecutiveDimImpl( static ForwardIt UniqueConsecutiveDimImpl(
const framework::ExecutionContext& context, const Context& context,
ForwardIt first, ForwardIt first,
ForwardIt last, ForwardIt last,
const std::vector<IndexT>& sorted_indices_vec, const std::vector<IndexT>& sorted_indices_vec,
...@@ -104,7 +129,7 @@ static ForwardIt UniqueConsecutiveDimImpl( ...@@ -104,7 +129,7 @@ static ForwardIt UniqueConsecutiveDimImpl(
while (++first != last) { while (++first != last) {
int64_t idx_first = std::distance(begin, first); int64_t idx_first = std::distance(begin, first);
int64_t idx_result = std::distance(begin, result); int64_t idx_result = std::distance(begin, result);
if (!Equal<InT>(*result, *first)) { if (!phi::funcs::Equal<InT>(*result, *first)) {
if (++result != first) { if (++result != first) {
*result = std::move(*first); *result = std::move(*first);
} }
...@@ -116,13 +141,15 @@ static ForwardIt UniqueConsecutiveDimImpl( ...@@ -116,13 +141,15 @@ static ForwardIt UniqueConsecutiveDimImpl(
return ++result; return ++result;
} }
template <typename DeviceContext, typename InT, typename IndexT> template <typename Context, typename InT, typename IndexT>
static void UniqueConsecutiveDim(const framework::ExecutionContext& context, static void UniqueConsecutiveDim(const Context& context,
const framework::Tensor& in, const DenseTensor& in,
framework::Tensor* out, DenseTensor* out,
bool return_inverse, bool return_inverse,
bool return_counts, bool return_counts,
int axis) { int axis,
DenseTensor* inverse,
DenseTensor* count) {
// transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2] // transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2]
std::vector<int> permute(in.dims().size()); std::vector<int> permute(in.dims().size());
std::iota(permute.begin(), permute.end(), 0); std::iota(permute.begin(), permute.end(), 0);
...@@ -131,15 +158,14 @@ static void UniqueConsecutiveDim(const framework::ExecutionContext& context, ...@@ -131,15 +158,14 @@ static void UniqueConsecutiveDim(const framework::ExecutionContext& context,
std::vector<int64_t> in_trans_dims_vec(phi::vectorize(in.dims())); std::vector<int64_t> in_trans_dims_vec(phi::vectorize(in.dims()));
in_trans_dims_vec[axis] = in.dims()[0]; in_trans_dims_vec[axis] = in.dims()[0];
in_trans_dims_vec[0] = in.dims()[axis]; in_trans_dims_vec[0] = in.dims()[axis];
framework::Tensor in_trans; DenseTensor in_trans;
framework::DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec); DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec);
in_trans.Resize(in_trans_dims); in_trans.Resize(in_trans_dims);
in_trans.mutable_data<InT>(context.GetPlace()); context.template Alloc<InT>(&in_trans);
auto& dev_ctx = context.template device_context<DeviceContext>(); phi::funcs::TransCompute<Context, InT>(
TransCompute<DeviceContext, InT>( in.dims().size(), context, in, &in_trans, permute);
in.dims().size(), dev_ctx, in, &in_trans, permute);
// reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2] // reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2]
framework::DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1); DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1);
in_trans.Resize(in_trans_flat_dims); in_trans.Resize(in_trans_flat_dims);
std::vector<IndexT> sorted_indices_vec(in_trans.dims()[0]); std::vector<IndexT> sorted_indices_vec(in_trans.dims()[0]);
...@@ -148,21 +174,21 @@ static void UniqueConsecutiveDim(const framework::ExecutionContext& context, ...@@ -148,21 +174,21 @@ static void UniqueConsecutiveDim(const framework::ExecutionContext& context,
const InT* in_trans_data = in_trans.data<InT>(); const InT* in_trans_data = in_trans.data<InT>();
// sort tensor according to indices // sort tensor according to indices
framework::Tensor input_sorted; DenseTensor input_sorted;
input_sorted.Resize(in_trans_dims); input_sorted.Resize(in_trans_dims);
input_sorted.mutable_data<InT>(context.GetPlace()); context.template Alloc<InT>(&input_sorted);
InT* input_sorted_data = input_sorted.data<InT>(); InT* input_sorted_data = input_sorted.data<InT>();
for (size_t i = 0; i < sorted_indices_vec.size(); ++i) { for (size_t i = 0; i < sorted_indices_vec.size(); ++i) {
memcpy(input_sorted_data + i * col, memcpy(input_sorted_data + i * col,
in_trans_data + static_cast<int64_t>(sorted_indices_vec[i]) * col, in_trans_data + static_cast<int64_t>(sorted_indices_vec[i]) * col,
col * sizeof(InT)); col * sizeof(InT));
} }
std::vector<framework::Tensor> input_unbind = Unbind(input_sorted); std::vector<DenseTensor> input_unbind = phi::funcs::Unbind(input_sorted);
std::vector<IndexT> inverse_vec(sorted_indices_vec.size(), 0); std::vector<IndexT> inverse_vec(sorted_indices_vec.size(), 0);
std::vector<IndexT> counts_vec(sorted_indices_vec.size(), 0); std::vector<IndexT> counts_vec(sorted_indices_vec.size(), 0);
auto last = auto last = UniqueConsecutiveDimImpl<Context,
UniqueConsecutiveDimImpl<std::vector<framework::Tensor>::iterator, InT>( std::vector<DenseTensor>::iterator,
context, InT>(context,
input_unbind.begin(), input_unbind.begin(),
input_unbind.end(), input_unbind.end(),
sorted_indices_vec, sorted_indices_vec,
...@@ -171,117 +197,65 @@ static void UniqueConsecutiveDim(const framework::ExecutionContext& context, ...@@ -171,117 +197,65 @@ static void UniqueConsecutiveDim(const framework::ExecutionContext& context,
input_unbind.erase(last, input_unbind.end()); input_unbind.erase(last, input_unbind.end());
counts_vec.erase(counts_vec.begin() + input_unbind.size(), counts_vec.end()); counts_vec.erase(counts_vec.begin() + input_unbind.size(), counts_vec.end());
math::ConcatFunctor<DeviceContext, InT> concat_functor; phi::funcs::ConcatFunctor<Context, InT> concat_functor;
framework::Tensor out_trans; DenseTensor out_trans;
std::vector<int64_t> out_trans_dims_vec = in_trans_dims_vec; std::vector<int64_t> out_trans_dims_vec = in_trans_dims_vec;
out_trans_dims_vec[0] = input_unbind.size(); out_trans_dims_vec[0] = input_unbind.size();
out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); out_trans.Resize(phi::make_ddim(out_trans_dims_vec));
out_trans.mutable_data<InT>(context.GetPlace()); context.template Alloc<InT>(&out_trans);
std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]);
out->Resize(phi::make_ddim(out_trans_dims_vec)); out->Resize(phi::make_ddim(out_trans_dims_vec));
out->mutable_data<InT>(context.GetPlace()); context.template Alloc<InT>(out);
concat_functor(dev_ctx, input_unbind, 0, &out_trans); concat_functor(context, input_unbind, 0, &out_trans);
TransCompute<DeviceContext, InT>( phi::funcs::TransCompute<Context, InT>(
out_trans.dims().size(), dev_ctx, out_trans, out, permute); out_trans.dims().size(), context, out_trans, out, permute);
if (return_inverse) { if (return_inverse) {
auto* inverse = context.Output<framework::Tensor>("Index"); paddle::framework::TensorFromVector(inverse_vec, context, inverse);
framework::TensorFromVector(inverse_vec, context.device_context(), inverse);
} }
if (return_counts) { if (return_counts) {
auto* count = context.Output<framework::Tensor>("Counts"); paddle::framework::TensorFromVector(counts_vec, context, count);
framework::TensorFromVector(counts_vec, context.device_context(), count);
} }
} }
template <typename DeviceContext, typename InT> template <typename Context, typename InT>
struct UniqueConsecutiveFlattendTensorFunctor {
const framework::ExecutionContext& ctx_;
const framework::Tensor& in_;
framework::Tensor* out_;
const bool return_inverse_;
const bool return_counts_;
UniqueConsecutiveFlattendTensorFunctor(
const framework::ExecutionContext& context,
const framework::Tensor& in,
framework::Tensor* out,
bool return_inverse,
bool return_counts)
: ctx_(context),
in_(in),
out_(out),
return_inverse_(return_inverse),
return_counts_(return_counts) {}
template <typename IndexT>
void apply() const {
UniqueConsecutiveFlattendTensor<InT, IndexT>(
ctx_, in_, out_, return_inverse_, return_counts_);
}
};
template <typename DeviceContext, typename InT>
struct UniqueConsecutiveDimFunctor { struct UniqueConsecutiveDimFunctor {
const framework::ExecutionContext& ctx_; const Context& ctx_;
const framework::Tensor& in_; const DenseTensor& in_;
framework::Tensor* out_; DenseTensor* out_;
const int axis_; const int axis_;
const bool return_inverse_; const bool return_inverse_;
const bool return_counts_; const bool return_counts_;
UniqueConsecutiveDimFunctor(const framework::ExecutionContext& context, DenseTensor* inverse_;
const framework::Tensor& in, DenseTensor* count_;
framework::Tensor* out,
UniqueConsecutiveDimFunctor(const Context& context,
const DenseTensor& in,
DenseTensor* out,
const int axis, const int axis,
bool return_inverse, bool return_inverse,
bool return_counts) bool return_counts,
DenseTensor* inverse,
DenseTensor* count)
: ctx_(context), : ctx_(context),
in_(in), in_(in),
out_(out), out_(out),
axis_(axis), axis_(axis),
return_inverse_(return_inverse), return_inverse_(return_inverse),
return_counts_(return_counts) {} return_counts_(return_counts),
inverse_(inverse),
count_(count) {}
template <typename IndexT> template <typename IndexT>
void apply() const { void apply() const {
UniqueConsecutiveDim<DeviceContext, InT, IndexT>( UniqueConsecutiveDim<Context, InT, IndexT>(ctx_,
ctx_, in_, out_, return_inverse_, return_counts_, axis_); in_,
out_,
return_inverse_,
return_counts_,
axis_,
inverse_,
count_);
} }
}; };
template <typename DeviceContext, typename T>
class UniqueConsecutiveKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<framework::Tensor>("X");
auto* out = context.Output<framework::Tensor>("Out");
auto data_type = static_cast<framework::proto::VarType::Type>(
context.Attr<int>("dtype"));
if (data_type == framework::proto::VarType::INT32) {
PADDLE_ENFORCE_LE(
x->numel(),
INT_MAX,
platform::errors::InvalidArgument(
"The number of elements in Input(X) should be less than or "
"equal to INT_MAX, but received num is %d. Please set `dtype` to "
"int64.",
x->numel()));
}
std::vector<int> axis_vec = context.Attr<std::vector<int>>("axis");
bool return_inverse = context.Attr<bool>("return_inverse");
bool return_counts = context.Attr<bool>("return_counts");
if (axis_vec.empty()) { } // namespace phi
framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveFlattendTensorFunctor<DeviceContext, T>(
context, *x, out, return_inverse, return_counts));
} else {
int axis = axis_vec[0];
framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveDimFunctor<DeviceContext, T>(
context, *x, out, axis, return_inverse, return_counts));
}
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/unique_consecutive_kernel.h"
#include "paddle/phi/kernels/cpu/unique_consecutive_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/fluid/framework/data_type.h"
namespace phi {
template <typename T, typename Context>
void UniqueConsecutiveKernel(const Context& dev_ctx,
const DenseTensor& x,
bool return_inverse,
bool return_counts,
const std::vector<int>& axis,
int dtype,
DenseTensor* out,
DenseTensor* index,
DenseTensor* counts) {
auto data_type = static_cast<paddle::framework::proto::VarType::Type>(dtype);
if (data_type == paddle::framework::proto::VarType::INT32) {
PADDLE_ENFORCE_LE(
x.numel(),
INT_MAX,
phi::errors::InvalidArgument(
"The number of elements in Input(X) should be less than or "
"equal to INT_MAX, but received num is %d. Please set `dtype` to "
"int64.",
x.numel()));
}
if (axis.empty()) {
paddle::framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveFlattenedTensorFunctor<Context, T>(
dev_ctx, x, out, return_inverse, return_counts, index, counts));
} else {
int valid_axis = axis[0];
paddle::framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveDimFunctor<Context, T>(dev_ctx,
x,
out,
valid_axis,
return_inverse,
return_counts,
index,
counts));
}
}
} // namespace phi
PD_REGISTER_KERNEL(unique_consecutive,
CPU,
ALL_LAYOUT,
phi::UniqueConsecutiveKernel,
float,
double,
int32_t,
int64_t) {}
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
You may obtain a copy of the License at // You may obtain a copy of the License at
//
http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
//
Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
limitations under the License. */ // limitations under the License.
#pragma once
#include <thrust/adjacent_difference.h> #include <thrust/adjacent_difference.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/execution_policy.h> #include <thrust/execution_policy.h>
...@@ -22,159 +24,51 @@ limitations under the License. */ ...@@ -22,159 +24,51 @@ limitations under the License. */
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include "paddle/fluid/framework/tensor_util.h" // TensorToVector() #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/unique_consecutive_op.h" // TransComute()
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
// Binary function 'equal_to'
template <typename InT>
struct BinaryEqual {
int64_t col;
const InT* in_trans_data;
BinaryEqual(int64_t _col, const InT* _in_trans_data)
: col(_col), in_trans_data(_in_trans_data) {}
__device__ bool operator()(int64_t a, int64_t b) const {
for (int64_t i = 0; i < col; ++i) {
InT lhs = in_trans_data[i + a * col];
InT rhs = in_trans_data[i + b * col];
if (lhs != rhs) {
return false;
}
}
return true;
}
};
// Binary function 'not_equal_to' #include "paddle/phi/core/dense_tensor.h"
template <typename InT> #include "paddle/phi/core/tensor_utils.h"
struct BinaryNotEqual { #include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
int64_t col; #include "paddle/phi/kernels/funcs/math_function.h"
const InT* in_trans_data; #include "paddle/phi/kernels/funcs/unique_functor.h"
BinaryNotEqual(int64_t _col, const InT* _in_trans_data) namespace phi {
: col(_col), in_trans_data(_in_trans_data) {}
__device__ bool operator()(int64_t a, int64_t b) const {
for (int64_t i = 0; i < col; ++i) {
InT lhs = in_trans_data[i + a * col];
InT rhs = in_trans_data[i + b * col];
if (lhs != rhs) {
return true;
}
}
return false;
}
};
// index_select() function for Tensor
template <typename InT, typename IndexT>
void IndexSelect(const framework::ExecutionContext& context,
const Tensor& input,
const Tensor& index,
Tensor* output,
int dim) {
auto input_dim = input.dims();
auto input_dim_size = input_dim.size();
auto output_dim = output->dims();
auto slice_size = 1;
for (auto i = dim + 1; i < input_dim_size; i++) {
slice_size *= input_dim[i];
}
auto input_width = slice_size * input_dim[dim];
auto output_width = slice_size * output_dim[dim];
auto outer_nums = 1;
for (auto i = 0; i < dim; i++) {
outer_nums *= input_dim[i];
}
auto index_size = index.dims()[0];
std::vector<InT> input_vec;
std::vector<IndexT> index_vec;
paddle::framework::TensorToVector(
input, context.device_context(), &input_vec);
paddle::framework::TensorToVector(
index, context.device_context(), &index_vec);
std::vector<InT> out_vec(output->numel());
for (int i = 0; i < index_size; i++) {
PADDLE_ENFORCE_GE(
index_vec[i],
0,
platform::errors::InvalidArgument(
"Variable value (index) of OP(index_select) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
input_dim[dim],
index_vec[i]));
PADDLE_ENFORCE_LT(
index_vec[i],
input_dim[dim],
platform::errors::InvalidArgument(
"Variable value (index) of OP(index_select) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
input_dim[dim],
index_vec[i]));
}
for (auto i = 0; i < outer_nums; i++) {
auto input_start_offset = i * input_width;
auto output_start_offset = i * output_width;
for (auto j = 0; j < index_size; j++) {
IndexT index_value = index_vec[j];
for (auto k = 0; k < slice_size; k++) {
out_vec[output_start_offset + j * slice_size + k] =
input_vec[input_start_offset + index_value * slice_size + k];
}
}
}
output->mutable_data<InT>(context.GetPlace());
framework::TensorFromVector(out_vec, context.device_context(), output);
output->Resize(output_dim);
}
// The core logic of computing Unique Consecutive for a flattend Tensor // The core logic of computing Unique Consecutive for a flattend Tensor
template <typename InT, typename IndexT, typename equal_T, typename not_equal_T> template <typename Context,
static void UniqueConsecutiveFlattendCUDATensor( typename InT,
const framework::ExecutionContext& context, typename IndexT,
const Tensor& in, typename equal_T,
Tensor* out, typename not_equal_T>
static void UniqueConsecutiveFlattenedCUDATensor(const Context& context,
const DenseTensor& in,
DenseTensor* out,
bool return_inverse, bool return_inverse,
bool return_counts, bool return_counts,
equal_T equal, equal_T equal,
not_equal_T not_equal, not_equal_T not_equal,
int64_t num_input) { int64_t num_input,
// 0. Prepration DenseTensor* inverse,
Tensor in_hat; DenseTensor* counts) {
framework::TensorCopy(in, context.GetPlace(), &in_hat); // 0. Preparation
auto in_data_hat = in_hat.mutable_data<InT>(context.GetPlace()); DenseTensor in_hat;
phi::Copy(context, in, context.GetPlace(), false, &in_hat);
Tensor sorted_indices; auto in_data_hat = context.template Alloc<InT>(&in_hat);
DenseTensor sorted_indices;
sorted_indices.Resize(phi::make_ddim({num_input})); sorted_indices.Resize(phi::make_ddim({num_input}));
auto sorted_indices_data = auto sorted_indices_data = context.template Alloc<IndexT>(&sorted_indices);
sorted_indices.mutable_data<IndexT>(context.GetPlace());
thrust::sequence( thrust::sequence(
thrust::device, sorted_indices_data, sorted_indices_data + num_input); thrust::device, sorted_indices_data, sorted_indices_data + num_input);
// 1. Calculate op result: 'out' // 1. Calculate op result: 'out'
Tensor range; DenseTensor range;
range.Resize(phi::make_ddim({num_input + 1})); range.Resize(phi::make_ddim({num_input + 1}));
auto range_data_ptr = range.mutable_data<IndexT>(context.GetPlace()); auto range_data_ptr = context.template Alloc<IndexT>(&range);
thrust::sequence( thrust::sequence(
thrust::device, range_data_ptr, range_data_ptr + num_input + 1); thrust::device, range_data_ptr, range_data_ptr + num_input + 1);
framework::TensorCopy(in_hat, context.GetPlace(), out); phi::Copy(context, in_hat, context.GetPlace(), false, out);
int num_out; int num_out;
auto out_data = out->mutable_data<InT>(context.GetPlace()); auto out_data = context.template Alloc<InT>(out);
num_out = num_out =
thrust::unique_by_key( thrust::unique_by_key(
thrust::device, out_data, out_data + num_input, range_data_ptr, equal) thrust::device, out_data, out_data + num_input, range_data_ptr, equal)
...@@ -184,12 +78,11 @@ static void UniqueConsecutiveFlattendCUDATensor( ...@@ -184,12 +78,11 @@ static void UniqueConsecutiveFlattendCUDATensor(
// 2. Calculate inverse index: 'inverse' // 2. Calculate inverse index: 'inverse'
if (return_inverse) { if (return_inverse) {
Tensor* inverse = context.Output<Tensor>("Index");
inverse->Resize(phi::make_ddim({num_input})); inverse->Resize(phi::make_ddim({num_input}));
auto inverse_data = inverse->mutable_data<IndexT>(context.GetPlace()); auto inverse_data = context.template Alloc<IndexT>(inverse);
Tensor inv_loc; DenseTensor inv_loc;
inv_loc.Resize(phi::make_ddim({num_input})); inv_loc.Resize(phi::make_ddim({num_input}));
auto inv_loc_data_ptr = inv_loc.mutable_data<IndexT>(context.GetPlace()); auto inv_loc_data_ptr = context.template Alloc<IndexT>(&inv_loc);
thrust::adjacent_difference(thrust::device, thrust::adjacent_difference(thrust::device,
in_data_hat, in_data_hat,
in_data_hat + num_input, in_data_hat + num_input,
...@@ -209,9 +102,8 @@ static void UniqueConsecutiveFlattendCUDATensor( ...@@ -209,9 +102,8 @@ static void UniqueConsecutiveFlattendCUDATensor(
} }
// 3. Calculate 'counts' // 3. Calculate 'counts'
if (return_counts) { if (return_counts) {
Tensor* counts = context.Output<Tensor>("Counts");
counts->Resize(phi::make_ddim({num_out})); counts->Resize(phi::make_ddim({num_out}));
auto count_data = counts->mutable_data<IndexT>(context.GetPlace()); auto count_data = context.template Alloc<IndexT>(counts);
// init 'count_data' as 0 // init 'count_data' as 0
thrust::fill(thrust::device, count_data, count_data + num_out, 0); thrust::fill(thrust::device, count_data, count_data + num_out, 0);
thrust::device_ptr<IndexT> range_data_ptr_dev(range_data_ptr); thrust::device_ptr<IndexT> range_data_ptr_dev(range_data_ptr);
...@@ -223,26 +115,72 @@ static void UniqueConsecutiveFlattendCUDATensor( ...@@ -223,26 +115,72 @@ static void UniqueConsecutiveFlattendCUDATensor(
} }
} }
// functor for processing a flattend Tensor
template <typename Context, typename InT>
struct UniqueConsecutiveFlattenedCUDAFunctor {
const Context& ctx_;
const DenseTensor& in_;
DenseTensor* out_;
const bool return_inverse_;
const bool return_counts_;
DenseTensor* inverse_;
DenseTensor* count_;
UniqueConsecutiveFlattenedCUDAFunctor(const Context& context,
const DenseTensor& in,
DenseTensor* out,
bool return_inverse,
bool return_counts,
DenseTensor* inverse,
DenseTensor* count)
: ctx_(context),
in_(in),
out_(out),
return_inverse_(return_inverse),
return_counts_(return_counts),
inverse_(inverse),
count_(count) {}
template <typename IndexT>
void apply() const {
UniqueConsecutiveFlattenedCUDATensor<Context, InT, IndexT>(
ctx_,
in_,
out_,
return_inverse_,
return_counts_,
thrust::equal_to<InT>(),
thrust::not_equal_to<InT>(),
in_.numel(),
inverse_,
count_);
}
};
// The logic of compute unique with axis required, it's a little different // The logic of compute unique with axis required, it's a little different
// from above function // from above function
template <typename InT, typename IndexT, typename equal_T, typename not_equal_T> template <typename Context,
static void ComputeUniqueConsecutiveDims( typename InT,
const framework::ExecutionContext& context, typename IndexT,
Tensor* sorted_indices, typename equal_T,
typename not_equal_T>
static void ComputeUniqueConsecutiveDims(const Context& context,
DenseTensor* sorted_indices,
IndexT* sorted_indices_data, IndexT* sorted_indices_data,
Tensor* out, DenseTensor* out,
bool return_inverse, bool return_inverse,
bool return_counts, bool return_counts,
equal_T equal, equal_T equal,
not_equal_T not_equal, not_equal_T not_equal,
int64_t row) { int64_t row,
DenseTensor* inverse,
DenseTensor* counts) {
// 1. inverse indices: 'inverse' // 1. inverse indices: 'inverse'
Tensor* inverse = context.Output<Tensor>("Index");
inverse->Resize(phi::make_ddim({row})); inverse->Resize(phi::make_ddim({row}));
auto inverse_data = inverse->mutable_data<IndexT>(context.GetPlace()); auto inverse_data = context.template Alloc<IndexT>(inverse);
Tensor inv_loc; DenseTensor inv_loc;
inv_loc.Resize(phi::make_ddim({row})); inv_loc.Resize(phi::make_ddim({row}));
auto inv_loc_data_ptr = inv_loc.mutable_data<IndexT>(context.GetPlace()); auto inv_loc_data_ptr = context.template Alloc<IndexT>(&inv_loc);
thrust::adjacent_difference(thrust::device, thrust::adjacent_difference(thrust::device,
sorted_indices_data, sorted_indices_data,
sorted_indices_data + row, sorted_indices_data + row,
...@@ -261,9 +199,9 @@ static void ComputeUniqueConsecutiveDims( ...@@ -261,9 +199,9 @@ static void ComputeUniqueConsecutiveDims(
inverse_data); inverse_data);
// 2. sorted indices // 2. sorted indices
Tensor range; DenseTensor range;
range.Resize(phi::make_ddim({row + 1})); range.Resize(phi::make_ddim({row + 1}));
auto range_data_ptr = range.mutable_data<IndexT>(context.GetPlace()); auto range_data_ptr = context.template Alloc<IndexT>(&range);
thrust::sequence(thrust::device, range_data_ptr, range_data_ptr + row + 1); thrust::sequence(thrust::device, range_data_ptr, range_data_ptr + row + 1);
int num_out; int num_out;
num_out = thrust::unique_by_key(thrust::device, num_out = thrust::unique_by_key(thrust::device,
...@@ -278,23 +216,135 @@ static void ComputeUniqueConsecutiveDims( ...@@ -278,23 +216,135 @@ static void ComputeUniqueConsecutiveDims(
sorted_indices->Resize(phi::make_ddim({num_out})); sorted_indices->Resize(phi::make_ddim({num_out}));
// 3. counts: 'counts' // 3. counts: 'counts'
Tensor* counts = context.Output<Tensor>("Counts");
counts->Resize(phi::make_ddim({num_out})); counts->Resize(phi::make_ddim({num_out}));
auto count_data = counts->mutable_data<IndexT>(context.GetPlace()); auto count_data = context.template Alloc<IndexT>(counts);
thrust::fill(thrust::device, count_data, count_data + row, 0); thrust::fill(thrust::device, count_data, count_data + row, 0);
thrust::adjacent_difference( thrust::adjacent_difference(
thrust::device, range_data_ptr + 1, range_data_ptr + row + 1, count_data); thrust::device, range_data_ptr + 1, range_data_ptr + row + 1, count_data);
} }
// Binary function 'equal_to'
template <typename InT>
struct BinaryEqual {
int64_t col;
const InT* in_trans_data;
BinaryEqual(int64_t _col, const InT* _in_trans_data)
: col(_col), in_trans_data(_in_trans_data) {}
__device__ bool operator()(int64_t a, int64_t b) const {
for (int64_t i = 0; i < col; ++i) {
InT lhs = in_trans_data[i + a * col];
InT rhs = in_trans_data[i + b * col];
if (lhs != rhs) {
return false;
}
}
return true;
}
};
// Binary function 'not_equal_to'
template <typename InT>
struct BinaryNotEqual {
int64_t col;
const InT* in_trans_data;
BinaryNotEqual(int64_t _col, const InT* _in_trans_data)
: col(_col), in_trans_data(_in_trans_data) {}
__device__ bool operator()(int64_t a, int64_t b) const {
for (int64_t i = 0; i < col; ++i) {
InT lhs = in_trans_data[i + a * col];
InT rhs = in_trans_data[i + b * col];
if (lhs != rhs) {
return true;
}
}
return false;
}
};
// index_select() function for Tensor
template <typename Context, typename InT, typename IndexT>
void IndexSelect(const Context& context,
const DenseTensor& input,
const DenseTensor& index,
DenseTensor* output,
int dim) {
auto input_dim = input.dims();
auto input_dim_size = input_dim.size();
auto output_dim = output->dims();
auto slice_size = 1;
for (auto i = dim + 1; i < input_dim_size; i++) {
slice_size *= input_dim[i];
}
auto input_width = slice_size * input_dim[dim];
auto output_width = slice_size * output_dim[dim];
auto outer_nums = 1;
for (auto i = 0; i < dim; i++) {
outer_nums *= input_dim[i];
}
auto index_size = index.dims()[0];
std::vector<InT> input_vec;
std::vector<IndexT> index_vec;
paddle::framework::TensorToVector(input, context, &input_vec);
paddle::framework::TensorToVector(index, context, &index_vec);
std::vector<InT> out_vec(output->numel());
for (int i = 0; i < index_size; i++) {
PADDLE_ENFORCE_GE(
index_vec[i],
0,
phi::errors::InvalidArgument(
"Variable value (index) of OP(index_select) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
input_dim[dim],
index_vec[i]));
PADDLE_ENFORCE_LT(
index_vec[i],
input_dim[dim],
phi::errors::InvalidArgument(
"Variable value (index) of OP(index_select) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
input_dim[dim],
index_vec[i]));
}
for (auto i = 0; i < outer_nums; i++) {
auto input_start_offset = i * input_width;
auto output_start_offset = i * output_width;
for (auto j = 0; j < index_size; j++) {
IndexT index_value = index_vec[j];
for (auto k = 0; k < slice_size; k++) {
out_vec[output_start_offset + j * slice_size + k] =
input_vec[input_start_offset + index_value * slice_size + k];
}
}
}
context.template Alloc<InT>(output);
paddle::framework::TensorFromVector(out_vec, context, output);
output->Resize(output_dim);
}
// Calculate unique consecutive when 'axis' is set // Calculate unique consecutive when 'axis' is set
template <typename DeviceContext, typename InT, typename IndexT> template <typename Context, typename InT, typename IndexT>
static void UniqueConsecutiveDimsCUDATensor( static void UniqueConsecutiveDimsCUDATensor(const Context& context,
const framework::ExecutionContext& context, const DenseTensor& in,
const Tensor& in, DenseTensor* out,
Tensor* out,
bool return_inverse, bool return_inverse,
bool return_counts, bool return_counts,
int axis) { int axis,
DenseTensor* inverse,
DenseTensor* counts) {
// 1. Transpose & reshape // 1. Transpose & reshape
// Transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2] // Transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2]
std::vector<int> permute(in.dims().size()); std::vector<int> permute(in.dims().size());
...@@ -304,19 +354,18 @@ static void UniqueConsecutiveDimsCUDATensor( ...@@ -304,19 +354,18 @@ static void UniqueConsecutiveDimsCUDATensor(
std::vector<int64_t> in_trans_dims_vec(phi::vectorize(in.dims())); std::vector<int64_t> in_trans_dims_vec(phi::vectorize(in.dims()));
in_trans_dims_vec[axis] = in.dims()[0]; in_trans_dims_vec[axis] = in.dims()[0];
in_trans_dims_vec[0] = in.dims()[axis]; in_trans_dims_vec[0] = in.dims()[axis];
framework::Tensor in_trans; DenseTensor in_trans;
framework::DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec); DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec);
in_trans.Resize(in_trans_dims); in_trans.Resize(in_trans_dims);
in_trans.mutable_data<InT>(context.GetPlace()); context.template Alloc<InT>(&in_trans);
auto& dev_ctx = context.cuda_device_context(); phi::funcs::TransCompute<Context, InT>(in.dims().size(), // num of dims
TransCompute<DeviceContext, InT>(in.dims().size(), // num of dims context, // device
dev_ctx, // device
in, // original Tensor in, // original Tensor
&in_trans, // Tensor after reshape &in_trans, // Tensor after reshape
permute); // index of axis permute); // index of axis
// Reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2] // Reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2]
framework::DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1); DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1);
in_trans.Resize(in_trans_flat_dims); in_trans.Resize(in_trans_flat_dims);
// now 'in_trans' is 2D // now 'in_trans' is 2D
...@@ -324,16 +373,15 @@ static void UniqueConsecutiveDimsCUDATensor( ...@@ -324,16 +373,15 @@ static void UniqueConsecutiveDimsCUDATensor(
int64_t row = in_trans.dims()[0]; int64_t row = in_trans.dims()[0];
const InT* in_trans_data = in_trans.data<InT>(); const InT* in_trans_data = in_trans.data<InT>();
Tensor sorted_indices; DenseTensor sorted_indices;
sorted_indices.Resize(phi::make_ddim({row})); sorted_indices.Resize(phi::make_ddim({row}));
auto sorted_indices_data = auto sorted_indices_data = context.template Alloc<IndexT>(&sorted_indices);
sorted_indices.mutable_data<IndexT>(context.GetPlace());
// 2. Calculate 'inverse', 'counts' // 2. Calculate 'inverse', 'counts'
// Init index // Init index
thrust::sequence( thrust::sequence(
thrust::device, sorted_indices_data, sorted_indices_data + row); thrust::device, sorted_indices_data, sorted_indices_data + row);
ComputeUniqueConsecutiveDims<InT, IndexT>( ComputeUniqueConsecutiveDims<Context, InT, IndexT>(
context, context,
&sorted_indices, &sorted_indices,
sorted_indices_data, sorted_indices_data,
...@@ -342,143 +390,70 @@ static void UniqueConsecutiveDimsCUDATensor( ...@@ -342,143 +390,70 @@ static void UniqueConsecutiveDimsCUDATensor(
return_counts, return_counts,
BinaryEqual<InT>(col, in_trans_data), BinaryEqual<InT>(col, in_trans_data),
BinaryNotEqual<InT>(col, in_trans_data), BinaryNotEqual<InT>(col, in_trans_data),
row); row,
inverse,
counts);
// 3. Select indices and reshape back to get 'out' // 3. Select indices and reshape back to get 'out'
Tensor out_trans; DenseTensor out_trans;
std::vector<int64_t> out_trans_dims_vec = in_trans_dims_vec; std::vector<int64_t> out_trans_dims_vec = in_trans_dims_vec;
out_trans_dims_vec[0] = sorted_indices.numel(); out_trans_dims_vec[0] = sorted_indices.numel();
out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); out_trans.Resize(phi::make_ddim(out_trans_dims_vec));
out_trans.mutable_data<InT>(context.GetPlace()); context.template Alloc<InT>(&out_trans);
IndexSelect<InT, IndexT>(context, in_trans, sorted_indices, &out_trans, 0); IndexSelect<Context, InT, IndexT>(
context, in_trans, sorted_indices, &out_trans, 0);
std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]);
out->Resize(phi::make_ddim(out_trans_dims_vec)); out->Resize(phi::make_ddim(out_trans_dims_vec));
out->mutable_data<InT>(context.GetPlace()); context.template Alloc<InT>(out);
std::vector<framework::Tensor> out_trans_unbind = Unbind(out_trans); std::vector<DenseTensor> out_trans_unbind = phi::funcs::Unbind(out_trans);
math::ConcatFunctor<DeviceContext, InT> concat_functor; phi::funcs::ConcatFunctor<Context, InT> concat_functor;
concat_functor(dev_ctx, out_trans_unbind, 0, &out_trans); concat_functor(context, out_trans_unbind, 0, &out_trans);
TransCompute<DeviceContext, InT>( phi::funcs::TransCompute<Context, InT>(
out_trans.dims().size(), dev_ctx, out_trans, out, permute); out_trans.dims().size(), context, out_trans, out, permute);
} }
// functor for processing a flattend Tensor
template <typename DeviceContext, typename InT>
struct UniqueConsecutiveFlattendCUDAFunctor {
const framework::ExecutionContext& ctx_;
const Tensor& in_;
Tensor* out_;
const bool return_inverse_;
const bool return_counts_;
UniqueConsecutiveFlattendCUDAFunctor(
const framework::ExecutionContext& context,
const Tensor& in,
Tensor* out,
bool return_inverse,
bool return_counts)
: ctx_(context),
in_(in),
out_(out),
return_inverse_(return_inverse),
return_counts_(return_counts) {}
template <typename IndexT>
void apply() const {
UniqueConsecutiveFlattendCUDATensor<InT, IndexT>(
ctx_,
in_,
out_,
return_inverse_,
return_counts_,
thrust::equal_to<InT>(),
thrust::not_equal_to<InT>(),
in_.numel());
}
};
// functor for processing a multi-dimentional Tensor // functor for processing a multi-dimentional Tensor
template <typename DeviceContext, typename InT> template <typename Context, typename InT>
struct UniqueConsecutiveDimsCUDAFunctor { struct UniqueConsecutiveDimsCUDAFunctor {
const framework::ExecutionContext& ctx_; const Context& ctx_;
const Tensor& in_; const DenseTensor& in_;
Tensor* out_; DenseTensor* out_;
const int axis_; const int axis_;
const bool return_inverse_; const bool return_inverse_;
const bool return_counts_; const bool return_counts_;
DenseTensor* inverse_;
DenseTensor* count_;
UniqueConsecutiveDimsCUDAFunctor(const framework::ExecutionContext& context, UniqueConsecutiveDimsCUDAFunctor(const Context& context,
const Tensor& in, const DenseTensor& in,
Tensor* out, DenseTensor* out,
const int axis, const int axis,
bool return_inverse, bool return_inverse,
bool return_counts) bool return_counts,
DenseTensor* inverse,
DenseTensor* count)
: ctx_(context), : ctx_(context),
in_(in), in_(in),
out_(out), out_(out),
axis_(axis), axis_(axis),
return_inverse_(return_inverse), return_inverse_(return_inverse),
return_counts_(return_counts) {} return_counts_(return_counts),
inverse_(inverse),
count_(count) {}
template <typename IndexT> template <typename IndexT>
void apply() const { void apply() const {
UniqueConsecutiveDimsCUDATensor<DeviceContext, InT, IndexT>( UniqueConsecutiveDimsCUDATensor<Context, InT, IndexT>(ctx_,
ctx_, in_, out_, return_inverse_, return_counts_, axis_); in_,
} out_,
}; return_inverse_,
return_counts_,
// Unique_Consecutive_op CUDA implementation. axis_,
template <typename InT> inverse_,
class UniqueConsecutiveKernel<platform::CUDADeviceContext, InT> count_);
: public framework::OpKernel<InT> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<framework::Tensor>("X");
auto* out = context.Output<framework::Tensor>("Out");
auto data_type = static_cast<framework::proto::VarType::Type>(
context.Attr<int>("dtype"));
if (data_type == framework::proto::VarType::INT32) {
PADDLE_ENFORCE_LE(
x->numel() + 1,
INT_MAX,
platform::errors::InvalidArgument(
"The number of elements in Input(X) should be less than or "
"equal to INT_MAX, but received num is %d. Please set `dtype` to "
"int64.",
x->numel()));
}
std::vector<int> axis_vec = context.Attr<std::vector<int>>("axis");
bool return_inverse = context.Attr<bool>("return_inverse");
bool return_counts = context.Attr<bool>("return_counts");
// if 'axis' is not required, flatten the Tensor.
if (axis_vec.empty()) {
framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveFlattendCUDAFunctor<platform::CUDADeviceContext,
InT>(
context, *x, out, return_inverse, return_counts));
} else {
// 'axis' is required.
int axis = axis_vec[0];
framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveDimsCUDAFunctor<platform::CUDADeviceContext, InT>(
context, *x, out, axis, return_inverse, return_counts));
}
} }
}; };
} // namespace operators } // namespace phi
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
unique_consecutive,
ops::UniqueConsecutiveKernel<paddle::platform::CUDADeviceContext, float>,
ops::UniqueConsecutiveKernel<paddle::platform::CUDADeviceContext, double>,
ops::UniqueConsecutiveKernel<paddle::platform::CUDADeviceContext, int32_t>,
ops::UniqueConsecutiveKernel<paddle::platform::CUDADeviceContext, int64_t>);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/unique_consecutive_kernel.h"
#include "paddle/phi/kernels/gpu/unique_consecutive_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/fluid/framework/data_type.h"
namespace phi {
template <typename T, typename Context>
void UniqueConsecutiveKernel(const Context& dev_ctx,
const DenseTensor& x,
bool return_inverse,
bool return_counts,
const std::vector<int>& axis,
int dtype,
DenseTensor* out,
DenseTensor* index,
DenseTensor* counts) {
auto data_type = static_cast<paddle::framework::proto::VarType::Type>(dtype);
if (data_type == paddle::framework::proto::VarType::INT32) {
PADDLE_ENFORCE_LE(
x.numel() + 1,
INT_MAX,
phi::errors::InvalidArgument(
"The number of elements in Input(X) should be less than or "
"equal to INT_MAX, but received num is %d. Please set `dtype` to "
"int64.",
x.numel()));
}
// if 'axis' is not required, flatten the Tensor.
if (axis.empty()) {
paddle::framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveFlattenedCUDAFunctor<Context, T>(
dev_ctx, x, out, return_inverse, return_counts, index, counts));
} else {
// 'axis' is required.
int valid_axis = axis[0];
paddle::framework::VisitDataTypeTiny(
data_type,
UniqueConsecutiveDimsCUDAFunctor<Context, T>(dev_ctx,
x,
out,
valid_axis,
return_inverse,
return_counts,
index,
counts));
}
}
} // namespace phi
PD_REGISTER_KERNEL(unique_consecutive,
GPU,
ALL_LAYOUT,
phi::UniqueConsecutiveKernel,
float,
double,
int32_t,
int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void UniqueConsecutiveKernel(const Context& dev_ctx,
const DenseTensor& x,
bool return_inverse,
bool return_counts,
const std::vector<int>& axis,
int dtype,
DenseTensor* out,
DenseTensor* index,
DenseTensor* counts);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature UniqueConsecutiveOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("unique_consecutive",
{"X"},
{"return_inverse", "return_counts", "axis", "dtype"},
{"Out", "Index", "Counts"});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(unique_consecutive,
phi::UniqueConsecutiveOpArgumentMapping);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册