未验证 提交 556f6eb0 编写于 作者: H hong 提交者: GitHub

move histogram to pten (#39496)

* move histogram to pten; test=develop

* fix format error; test=develop

* fix histogram kernel format; test=develop
上级 fb473067
......@@ -51,3 +51,5 @@ paddle/infrt/dialect/pd_ops_info.h
.lit_test_times.txt
paddle/infrt/tests/dialect/Output
paddle/infrt/tests/lit.cfg.py
paddle/fluid/pybind/eager_final_state_op_function_impl.h
paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h
......@@ -2165,6 +2165,8 @@ void OperatorWithKernel::BuildPtenKernelContext(
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(float, attr));
} else if (attr_defs[i].type_index == std::type_index(typeid(bool))) {
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(bool, attr));
} else if (attr_defs[i].type_index == std::type_index(typeid(int64_t))) {
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(int64_t, attr));
} else if (attr_defs[i].type_index ==
std::type_index(typeid(std::string))) {
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
......
......@@ -421,6 +421,8 @@ void BuildDygraphPtenKernelContext(
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(float, attr));
} else if (attr_defs[i].type_index == std::type_index(typeid(bool))) {
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(bool, attr));
} else if (attr_defs[i].type_index == std::type_index(typeid(int64_t))) {
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(int64_t, attr));
} else if (attr_defs[i].type_index ==
std::type_index(typeid(std::string))) {
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
......
......@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/histogram_op.h"
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
......@@ -85,8 +85,3 @@ REGISTER_OPERATOR(
histogram, ops::HistogramOp, ops::HistogramOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
histogram, ops::HistogramKernel<paddle::platform::CPUDeviceContext, float>,
ops::HistogramKernel<paddle::platform::CPUDeviceContext, double>,
ops::HistogramKernel<paddle::platform::CPUDeviceContext, int>,
ops::HistogramKernel<paddle::platform::CPUDeviceContext, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/operators/histogram_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/pten/core/hostdevice.h"
namespace paddle {
namespace operators {
using IndexType = int64_t;
using Tensor = framework::Tensor;
using platform::PADDLE_CUDA_NUM_THREADS;
inline int GET_BLOCKS(const int N) {
return (N + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS;
}
template <typename T, typename IndexType>
__device__ static IndexType GetBin(T input_value, T min_value, T max_value,
int64_t nbins) {
IndexType bin = static_cast<int>((input_value - min_value) * nbins /
(max_value - min_value));
IndexType output_index = bin < nbins - 1 ? bin : nbins - 1;
return output_index;
}
template <typename T, typename IndexType>
__global__ void KernelHistogram(const T* input, const int total_elements,
const int64_t nbins, const T min_value,
const T max_value, int64_t* output) {
extern __shared__ int64_t buf_hist[];
for (int i = threadIdx.x; i < nbins; i += blockDim.x) {
buf_hist[i] = 0;
}
__syncthreads();
CUDA_KERNEL_LOOP(input_index, total_elements) {
// const IndexType input_index = threadIdx.x + blockIdx.x * blockDim.x;
const auto input_value = input[input_index];
if (input_value >= min_value && input_value <= max_value) {
const IndexType output_index =
GetBin<T, IndexType>(input_value, min_value, max_value, nbins);
paddle::platform::CudaAtomicAdd(&buf_hist[output_index], 1);
}
}
__syncthreads();
for (int i = threadIdx.x; i < nbins; i += blockDim.x) {
paddle::platform::CudaAtomicAdd(&output[i], buf_hist[i]);
}
}
template <typename DeviceContext, typename T>
class HistogramCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(context.GetPlace()), true,
platform::errors::InvalidArgument("It must use CUDAPlace."));
const Tensor* input = context.Input<framework::Tensor>("X");
Tensor* output = context.Output<framework::Tensor>("Out");
auto& nbins = context.Attr<int64_t>("bins");
auto& minval = context.Attr<int>("min");
auto& maxval = context.Attr<int>("max");
const T* input_data = input->data<T>();
const int input_numel = input->numel();
int64_t* out_data = output->mutable_data<int64_t>(context.GetPlace());
pten::funcs::SetConstant<platform::CUDADeviceContext, int64_t>()(
context.template device_context<platform::CUDADeviceContext>(), output,
static_cast<int64_t>(0));
if (input_data == nullptr) return;
T output_min = static_cast<T>(minval);
T output_max = static_cast<T>(maxval);
if (output_min == output_max) {
auto input_x = framework::EigenVector<T>::Flatten(*input);
framework::Tensor input_min_t, input_max_t;
auto* input_min_data =
input_min_t.mutable_data<T>({1}, context.GetPlace());
auto* input_max_data =
input_max_t.mutable_data<T>({1}, context.GetPlace());
auto input_min_scala = framework::EigenScalar<T>::From(input_min_t);
auto input_max_scala = framework::EigenScalar<T>::From(input_max_t);
auto* place =
context.template device_context<DeviceContext>().eigen_device();
input_min_scala.device(*place) = input_x.minimum();
input_max_scala.device(*place) = input_x.maximum();
Tensor input_min_cpu, input_max_cpu;
paddle::framework::TensorCopySync(input_min_t, platform::CPUPlace(),
&input_min_cpu);
paddle::framework::TensorCopySync(input_max_t, platform::CPUPlace(),
&input_max_cpu);
output_min = input_min_cpu.data<T>()[0];
output_max = input_max_cpu.data<T>()[0];
}
if (output_min == output_max) {
output_min = output_min - 1;
output_max = output_max + 1;
}
PADDLE_ENFORCE_EQ(
(std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max)) ||
std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max))),
false, platform::errors::OutOfRange("range of min, max is not finite"));
PADDLE_ENFORCE_GE(
output_max, output_min,
platform::errors::InvalidArgument(
"max must be larger or equal to min. If min and max are both zero, "
"the minimum and maximum values of the data are used. "
"But received max is %d, min is %d",
maxval, minval));
auto stream =
context.template device_context<platform::CUDADeviceContext>().stream();
KernelHistogram<
T, IndexType><<<GET_BLOCKS(input_numel), PADDLE_CUDA_NUM_THREADS,
nbins * sizeof(int64_t), stream>>>(
input_data, input_numel, nbins, output_min, output_max, out_data);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
histogram,
ops::HistogramCUDAKernel<paddle::platform::CUDADeviceContext, int>,
ops::HistogramCUDAKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::HistogramCUDAKernel<paddle::platform::CUDADeviceContext, float>,
ops::HistogramCUDAKernel<paddle::platform::CUDADeviceContext, double>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/pten/kernels/funcs/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class HistogramKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<framework::Tensor>("X");
Tensor* output = context.Output<framework::Tensor>("Out");
auto& nbins = context.Attr<int64_t>("bins");
auto& minval = context.Attr<int>("min");
auto& maxval = context.Attr<int>("max");
const T* input_data = input->data<T>();
auto input_numel = input->numel();
int64_t* out_data = output->mutable_data<int64_t>(context.GetPlace());
pten::funcs::SetConstant<DeviceContext, int64_t>()(
context.template device_context<DeviceContext>(), output,
static_cast<int64_t>(0));
if (input_data == nullptr) return;
T output_min = static_cast<T>(minval);
T output_max = static_cast<T>(maxval);
if (output_min == output_max) {
output_min = *std::min_element(input_data, input_data + input_numel);
output_max = *std::max_element(input_data, input_data + input_numel);
}
if (output_min == output_max) {
output_min = output_min - 1;
output_max = output_max + 1;
}
PADDLE_ENFORCE_EQ(
(std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max)) ||
std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max))),
false, platform::errors::OutOfRange("range of min, max is not finite"));
PADDLE_ENFORCE_GE(
output_max, output_min,
platform::errors::InvalidArgument(
"max must be larger or equal to min. If min and max are both zero, "
"the minimum and maximum values of the data are used. "
"But received max is %d, min is %d",
maxval, minval));
for (int64_t i = 0; i < input_numel; i++) {
if (input_data[i] >= output_min && input_data[i] <= output_max) {
const int64_t bin = (int64_t)((input_data[i] - output_min) * nbins /
(output_max - output_min));
out_data[std::min(bin, nbins - 1)] += 1;
}
}
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/histogram_kernel.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/funcs/math_function.h"
namespace pten {
template <typename T, typename Context>
void HistogramKernel(const Context& dev_ctx,
const DenseTensor& input,
int64_t bins,
int min,
int max,
DenseTensor* output) {
auto& nbins = bins;
auto& minval = min;
auto& maxval = max;
const T* input_data = input.data<T>();
auto input_numel = input.numel();
int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace());
pten::funcs::SetConstant<Context, int64_t>()(
dev_ctx, output, static_cast<int64_t>(0));
if (input_data == nullptr) return;
T output_min = static_cast<T>(minval);
T output_max = static_cast<T>(maxval);
if (output_min == output_max) {
output_min = *std::min_element(input_data, input_data + input_numel);
output_max = *std::max_element(input_data, input_data + input_numel);
}
if (output_min == output_max) {
output_min = output_min - 1;
output_max = output_max + 1;
}
PADDLE_ENFORCE_EQ(
(std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max)) ||
std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max))),
false,
pten::errors::OutOfRange("range of min, max is not finite"));
PADDLE_ENFORCE_GE(
output_max,
output_min,
pten::errors::InvalidArgument(
"max must be larger or equal to min. If min and max are both zero, "
"the minimum and maximum values of the data are used. "
"But received max is %d, min is %d",
maxval,
minval));
for (int64_t i = 0; i < input_numel; i++) {
if (input_data[i] >= output_min && input_data[i] <= output_max) {
const int64_t bin = (int64_t)((input_data[i] - output_min) * nbins /
(output_max - output_min));
out_data[std::min(bin, nbins - 1)] += 1;
}
}
}
} // namespace pten
PT_REGISTER_KERNEL(histogram,
CPU,
ALL_LAYOUT,
pten::HistogramKernel,
float,
double,
int,
int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/funcs/math_function.h"
#include "paddle/pten/kernels/histogram_kernel.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/pten/kernels/funcs/eigen/common.h"
#include "paddle/pten/kernels/funcs/eigen/eigen_function.h"
namespace pten {
using IndexType = int64_t;
using paddle::platform::PADDLE_CUDA_NUM_THREADS;
inline int GET_BLOCKS(const int N) {
return (N + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS;
}
template <typename T, typename IndexType>
__device__ static IndexType GetBin(T input_value,
T min_value,
T max_value,
int64_t nbins) {
IndexType bin = static_cast<int>((input_value - min_value) * nbins /
(max_value - min_value));
IndexType output_index = bin < nbins - 1 ? bin : nbins - 1;
return output_index;
}
template <typename T, typename IndexType>
__global__ void KernelHistogram(const T* input,
const int total_elements,
const int64_t nbins,
const T min_value,
const T max_value,
int64_t* output) {
extern __shared__ int64_t buf_hist[];
for (int i = threadIdx.x; i < nbins; i += blockDim.x) {
buf_hist[i] = 0;
}
__syncthreads();
CUDA_KERNEL_LOOP(input_index, total_elements) {
// const IndexType input_index = threadIdx.x + blockIdx.x * blockDim.x;
const auto input_value = input[input_index];
if (input_value >= min_value && input_value <= max_value) {
const IndexType output_index =
GetBin<T, IndexType>(input_value, min_value, max_value, nbins);
paddle::platform::CudaAtomicAdd(&buf_hist[output_index], 1);
}
}
__syncthreads();
for (int i = threadIdx.x; i < nbins; i += blockDim.x) {
paddle::platform::CudaAtomicAdd(&output[i], buf_hist[i]);
}
}
template <typename T, typename Context>
void HistogramKernel(const Context& dev_ctx,
const DenseTensor& input,
int64_t bins,
int min,
int max,
DenseTensor* output) {
auto& nbins = bins;
auto& minval = min;
auto& maxval = max;
const T* input_data = input.data<T>();
const int input_numel = input.numel();
int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace());
pten::funcs::SetConstant<Context, int64_t>()(
dev_ctx, output, static_cast<int64_t>(0));
if (input_data == nullptr) return;
T output_min = static_cast<T>(minval);
T output_max = static_cast<T>(maxval);
if (output_min == output_max) {
auto input_x = pten::EigenVector<T>::Flatten(input);
DenseTensor input_min_t, input_max_t;
auto* input_min_data = input_min_t.mutable_data<T>({1}, dev_ctx.GetPlace());
auto* input_max_data = input_max_t.mutable_data<T>({1}, dev_ctx.GetPlace());
auto input_min_scala = pten::EigenScalar<T>::From(input_min_t);
auto input_max_scala = pten::EigenScalar<T>::From(input_max_t);
auto* place = dev_ctx.eigen_device();
input_min_scala.device(*place) = input_x.minimum();
input_max_scala.device(*place) = input_x.maximum();
DenseTensor input_min_cpu, input_max_cpu;
paddle::framework::TensorCopySync(
input_min_t, paddle::platform::CPUPlace(), &input_min_cpu);
paddle::framework::TensorCopySync(
input_max_t, paddle::platform::CPUPlace(), &input_max_cpu);
output_min = input_min_cpu.data<T>()[0];
output_max = input_max_cpu.data<T>()[0];
}
if (output_min == output_max) {
output_min = output_min - 1;
output_max = output_max + 1;
}
PADDLE_ENFORCE_EQ(
(std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max)) ||
std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max))),
false,
pten::errors::OutOfRange("range of min, max is not finite"));
PADDLE_ENFORCE_GE(
output_max,
output_min,
pten::errors::InvalidArgument(
"max must be larger or equal to min. If min and max are both zero, "
"the minimum and maximum values of the data are used. "
"But received max is %d, min is %d",
maxval,
minval));
auto stream = dev_ctx.stream();
KernelHistogram<T, IndexType><<<GET_BLOCKS(input_numel),
PADDLE_CUDA_NUM_THREADS,
nbins * sizeof(int64_t),
stream>>>(
input_data, input_numel, nbins, output_min, output_max, out_data);
}
} // namespace pten
PT_REGISTER_KERNEL(histogram,
GPU,
ALL_LAYOUT,
pten::HistogramKernel,
float,
double,
int,
int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T, typename Context>
void HistogramSelectKernel(const Context& dev_ctx,
const DenseTensor& input,
int64_t bins,
int min,
int max,
DenseTensor* out);
} // namespace pten
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/core/compat/op_utils.h"
namespace pten {
KernelSignature HistogramOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("histogram", {"X"}, {"bins", "min", "max"}, {"Out"});
}
} // namespace pten
PT_REGISTER_ARG_MAPPING_FN(histogram, pten::HistogramOpArgumentMapping);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册