提交 5076ad87 编写于 作者: P phlrain

move histogram to pten; test=develop

上级 74a150fe
...@@ -51,3 +51,5 @@ paddle/infrt/dialect/pd_ops_info.h ...@@ -51,3 +51,5 @@ paddle/infrt/dialect/pd_ops_info.h
.lit_test_times.txt .lit_test_times.txt
paddle/infrt/tests/dialect/Output paddle/infrt/tests/dialect/Output
paddle/infrt/tests/lit.cfg.py paddle/infrt/tests/lit.cfg.py
paddle/fluid/pybind/eager_final_state_op_function_impl.h
paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h
...@@ -2155,6 +2155,8 @@ void OperatorWithKernel::BuildPtenKernelContext( ...@@ -2155,6 +2155,8 @@ void OperatorWithKernel::BuildPtenKernelContext(
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(float, attr)); pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(float, attr));
} else if (attr_defs[i].type_index == std::type_index(typeid(bool))) { } else if (attr_defs[i].type_index == std::type_index(typeid(bool))) {
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(bool, attr)); pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(bool, attr));
} else if (attr_defs[i].type_index == std::type_index(typeid(int64_t))) {
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(int64_t, attr));
} else if (attr_defs[i].type_index == } else if (attr_defs[i].type_index ==
std::type_index(typeid(std::string))) { std::type_index(typeid(std::string))) {
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr)); pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
......
...@@ -412,6 +412,8 @@ void BuildDygraphPtenKernelContext( ...@@ -412,6 +412,8 @@ void BuildDygraphPtenKernelContext(
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(float, attr)); kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(float, attr));
} else if (attr_defs[i].type_index == std::type_index(typeid(bool))) { } else if (attr_defs[i].type_index == std::type_index(typeid(bool))) {
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(bool, attr)); kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(bool, attr));
} else if (attr_defs[i].type_index == std::type_index(typeid(int64_t))) {
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(int64_t, attr));
} else if (attr_defs[i].type_index == } else if (attr_defs[i].type_index ==
std::type_index(typeid(std::string))) { std::type_index(typeid(std::string))) {
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr)); kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/histogram_op.h" #include "paddle/fluid/framework/op_registry.h"
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -85,8 +85,8 @@ REGISTER_OPERATOR( ...@@ -85,8 +85,8 @@ REGISTER_OPERATOR(
histogram, ops::HistogramOp, ops::HistogramOpMaker, histogram, ops::HistogramOp, ops::HistogramOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( // REGISTER_OP_CPU_KERNEL(
histogram, ops::HistogramKernel<paddle::platform::CPUDeviceContext, float>, // histogram, ops::HistogramKernel<paddle::platform::CPUDeviceContext, float>,
ops::HistogramKernel<paddle::platform::CPUDeviceContext, double>, // ops::HistogramKernel<paddle::platform::CPUDeviceContext, double>,
ops::HistogramKernel<paddle::platform::CPUDeviceContext, int>, // ops::HistogramKernel<paddle::platform::CPUDeviceContext, int>,
ops::HistogramKernel<paddle::platform::CPUDeviceContext, int64_t>); // ops::HistogramKernel<paddle::platform::CPUDeviceContext, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); #include "paddle/pten/kernels/histogram_kernel.h"
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/pten/kernels/funcs/math_function.h"
namespace paddle { #include "paddle/pten/backends/cpu/cpu_context.h"
namespace operators { #include "paddle/pten/core/kernel_registry.h"
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T> namespace pten {
class HistogramKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<framework::Tensor>("X");
Tensor* output = context.Output<framework::Tensor>("Out");
auto& nbins = context.Attr<int64_t>("bins");
auto& minval = context.Attr<int>("min");
auto& maxval = context.Attr<int>("max");
const T* input_data = input->data<T>(); template <typename T, typename Context>
auto input_numel = input->numel(); void HistogramKernel(const Context& dev_ctx,
const DenseTensor& input,
int64_t bins,
int min,
int max,
DenseTensor* output)
{
auto& nbins = bins;
auto& minval = min;
auto& maxval = max;
int64_t* out_data = output->mutable_data<int64_t>(context.GetPlace()); const T* input_data = input.data<T>();
pten::funcs::SetConstant<DeviceContext, int64_t>()( auto input_numel = input.numel();
context.template device_context<DeviceContext>(), output,
int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace());
pten::funcs::SetConstant<Context, int64_t>()(
dev_ctx, output,
static_cast<int64_t>(0)); static_cast<int64_t>(0));
if (input_data == nullptr) return; if (input_data == nullptr) return;
...@@ -61,10 +46,10 @@ class HistogramKernel : public framework::OpKernel<T> { ...@@ -61,10 +46,10 @@ class HistogramKernel : public framework::OpKernel<T> {
std::isnan(static_cast<float>(output_max)) || std::isnan(static_cast<float>(output_max)) ||
std::isinf(static_cast<float>(output_min)) || std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max))), std::isnan(static_cast<float>(output_max))),
false, platform::errors::OutOfRange("range of min, max is not finite")); false, pten::errors::OutOfRange("range of min, max is not finite"));
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
output_max, output_min, output_max, output_min,
platform::errors::InvalidArgument( pten::errors::InvalidArgument(
"max must be larger or equal to min. If min and max are both zero, " "max must be larger or equal to min. If min and max are both zero, "
"the minimum and maximum values of the data are used. " "the minimum and maximum values of the data are used. "
"But received max is %d, min is %d", "But received max is %d, min is %d",
...@@ -77,8 +62,16 @@ class HistogramKernel : public framework::OpKernel<T> { ...@@ -77,8 +62,16 @@ class HistogramKernel : public framework::OpKernel<T> {
out_data[std::min(bin, nbins - 1)] += 1; out_data[std::min(bin, nbins - 1)] += 1;
} }
} }
} }
};
} // namspace pten
} // namespace operators PT_REGISTER_KERNEL(histogram,
} // namespace paddle CPU,
ALL_LAYOUT,
pten::HistogramKernel,
float,
double,
int,
int64_t) {}
\ No newline at end of file
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 #include "paddle/pten/kernels/histogram_kernel.h"
#include "paddle/pten/kernels/funcs/math_function.h"
Unless required by applicable law or agreed to in writing, software #include "paddle/pten/backends/gpu/gpu_context.h"
distributed under the License is distributed on an "AS IS" BASIS, #include "paddle/pten/core/kernel_registry.h"
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/operators/histogram_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/pten/core/hostdevice.h"
namespace paddle { #include "paddle/pten/kernels/funcs/eigen/eigen_function.h"
namespace operators { #include "paddle/pten/kernels/funcs/eigen/common.h"
namespace pten {
using IndexType = int64_t; using IndexType = int64_t;
using Tensor = framework::Tensor; using paddle::platform::PADDLE_CUDA_NUM_THREADS;
using platform::PADDLE_CUDA_NUM_THREADS;
inline int GET_BLOCKS(const int N) { inline int GET_BLOCKS(const int N) {
return (N + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS; return (N + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS;
...@@ -64,26 +56,24 @@ __global__ void KernelHistogram(const T* input, const int total_elements, ...@@ -64,26 +56,24 @@ __global__ void KernelHistogram(const T* input, const int total_elements,
} }
} }
template <typename DeviceContext, typename T> template <typename T, typename Context>
class HistogramCUDAKernel : public framework::OpKernel<T> { void HistogramKernel(const Context& dev_ctx,
public: const DenseTensor& input,
void Compute(const framework::ExecutionContext& context) const override { int64_t bins,
PADDLE_ENFORCE_EQ( int min,
platform::is_gpu_place(context.GetPlace()), true, int max,
platform::errors::InvalidArgument("It must use CUDAPlace.")); DenseTensor* output)
{
const Tensor* input = context.Input<framework::Tensor>("X"); auto& nbins = bins;
Tensor* output = context.Output<framework::Tensor>("Out"); auto& minval = min;
auto& nbins = context.Attr<int64_t>("bins"); auto& maxval = max;
auto& minval = context.Attr<int>("min");
auto& maxval = context.Attr<int>("max"); const T* input_data = input.data<T>();
const int input_numel = input.numel();
const T* input_data = input->data<T>();
const int input_numel = input->numel(); int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace());
pten::funcs::SetConstant<Context, int64_t>()(
int64_t* out_data = output->mutable_data<int64_t>(context.GetPlace()); dev_ctx, output,
pten::funcs::SetConstant<platform::CUDADeviceContext, int64_t>()(
context.template device_context<platform::CUDADeviceContext>(), output,
static_cast<int64_t>(0)); static_cast<int64_t>(0));
if (input_data == nullptr) return; if (input_data == nullptr) return;
...@@ -92,25 +82,25 @@ class HistogramCUDAKernel : public framework::OpKernel<T> { ...@@ -92,25 +82,25 @@ class HistogramCUDAKernel : public framework::OpKernel<T> {
T output_max = static_cast<T>(maxval); T output_max = static_cast<T>(maxval);
if (output_min == output_max) { if (output_min == output_max) {
auto input_x = framework::EigenVector<T>::Flatten(*input); auto input_x = pten::EigenVector<T>::Flatten(input);
framework::Tensor input_min_t, input_max_t; DenseTensor input_min_t, input_max_t;
auto* input_min_data = auto* input_min_data =
input_min_t.mutable_data<T>({1}, context.GetPlace()); input_min_t.mutable_data<T>({1}, dev_ctx.GetPlace());
auto* input_max_data = auto* input_max_data =
input_max_t.mutable_data<T>({1}, context.GetPlace()); input_max_t.mutable_data<T>({1}, dev_ctx.GetPlace());
auto input_min_scala = framework::EigenScalar<T>::From(input_min_t); auto input_min_scala = pten::EigenScalar<T>::From(input_min_t);
auto input_max_scala = framework::EigenScalar<T>::From(input_max_t); auto input_max_scala = pten::EigenScalar<T>::From(input_max_t);
auto* place = auto* place =
context.template device_context<DeviceContext>().eigen_device(); dev_ctx.eigen_device();
input_min_scala.device(*place) = input_x.minimum(); input_min_scala.device(*place) = input_x.minimum();
input_max_scala.device(*place) = input_x.maximum(); input_max_scala.device(*place) = input_x.maximum();
Tensor input_min_cpu, input_max_cpu; DenseTensor input_min_cpu, input_max_cpu;
paddle::framework::TensorCopySync(input_min_t, platform::CPUPlace(), paddle::framework::TensorCopySync(input_min_t, paddle::platform::CPUPlace(),
&input_min_cpu); &input_min_cpu);
paddle::framework::TensorCopySync(input_max_t, platform::CPUPlace(), paddle::framework::TensorCopySync(input_max_t, paddle::platform::CPUPlace(),
&input_max_cpu); &input_max_cpu);
output_min = input_min_cpu.data<T>()[0]; output_min = input_min_cpu.data<T>()[0];
...@@ -126,31 +116,31 @@ class HistogramCUDAKernel : public framework::OpKernel<T> { ...@@ -126,31 +116,31 @@ class HistogramCUDAKernel : public framework::OpKernel<T> {
std::isnan(static_cast<float>(output_max)) || std::isnan(static_cast<float>(output_max)) ||
std::isinf(static_cast<float>(output_min)) || std::isinf(static_cast<float>(output_min)) ||
std::isnan(static_cast<float>(output_max))), std::isnan(static_cast<float>(output_max))),
false, platform::errors::OutOfRange("range of min, max is not finite")); false, pten::errors::OutOfRange("range of min, max is not finite"));
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
output_max, output_min, output_max, output_min,
platform::errors::InvalidArgument( pten::errors::InvalidArgument(
"max must be larger or equal to min. If min and max are both zero, " "max must be larger or equal to min. If min and max are both zero, "
"the minimum and maximum values of the data are used. " "the minimum and maximum values of the data are used. "
"But received max is %d, min is %d", "But received max is %d, min is %d",
maxval, minval)); maxval, minval));
auto stream = auto stream =
context.template device_context<platform::CUDADeviceContext>().stream(); dev_ctx.stream();
KernelHistogram< KernelHistogram<
T, IndexType><<<GET_BLOCKS(input_numel), PADDLE_CUDA_NUM_THREADS, T, IndexType><<<GET_BLOCKS(input_numel), PADDLE_CUDA_NUM_THREADS,
nbins * sizeof(int64_t), stream>>>( nbins * sizeof(int64_t), stream>>>(
input_data, input_numel, nbins, output_min, output_max, out_data); input_data, input_numel, nbins, output_min, output_max, out_data);
} }
};
} //namespace pten
} // namespace operators
} // namespace paddle
PT_REGISTER_KERNEL(histogram,
namespace ops = paddle::operators; GPU,
REGISTER_OP_CUDA_KERNEL( ALL_LAYOUT,
histogram, pten::HistogramKernel,
ops::HistogramCUDAKernel<paddle::platform::CUDADeviceContext, int>, float,
ops::HistogramCUDAKernel<paddle::platform::CUDADeviceContext, int64_t>, double,
ops::HistogramCUDAKernel<paddle::platform::CUDADeviceContext, float>, int,
ops::HistogramCUDAKernel<paddle::platform::CUDADeviceContext, double>); int64_t) {}
#pragma once
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T, typename Context>
void HistogramSelectKernel(const Context& dev_ctx,
const DenseTensor& input,
int64_t bins,
int min,
int max,
DenseTensor* out);
} // namspace pten
#include "paddle/pten/core/compat/op_utils.h"
namespace pten {
KernelSignature HistogramOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature(
"histogram", {"X"}, {"bins", "min", "max"}, {"Out"});
}
} // namespace pten
PT_REGISTER_ARG_MAPPING_FN(histogram, pten::HistogramOpArgumentMapping);
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册