未验证 提交 798a4eac 编写于 作者: X Xiaoxu Chen 提交者: GitHub

migrate dirichlet kernel to phi (#44434)

* migrate dirichlet op kernel to phi

* fix dirichlet sample memory leak
上级 2781740b
...@@ -11,83 +11,14 @@ ...@@ -11,83 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/operators/dirichlet_op.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/generator.h" #include "paddle/phi/core/infermeta_utils.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/phi/infermeta/unary.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T, typename UniformSamplerT, typename NormalSamplerT>
struct GammaCPUFunctor {
GammaCPUFunctor(const T* alpha,
T* gamma,
BaseSampler<T, UniformSamplerT> uniform,
BaseSampler<T, NormalSamplerT> normal)
: alpha_(alpha), gamma_(gamma), uniform_(uniform), normal_(normal) {}
HOST void operator()(int64_t index) {
auto sample = sample_gamma<T, T, UniformSamplerT, NormalSamplerT>(
alpha_[index], uniform_, normal_);
gamma_[index] = std::max(std::numeric_limits<T>::min(), sample);
}
const T* alpha_;
T* gamma_;
BaseSampler<T, UniformSamplerT> uniform_;
BaseSampler<T, NormalSamplerT> normal_;
};
template <typename T>
struct DirichletSampler<phi::CPUContext, T> {
void operator()(const framework::ExecutionContext& ctx,
const Tensor* alpha,
Tensor* out) {
auto& dev_ctx = ctx.device_context<phi::CPUContext>();
auto p_gen = framework::DefaultCPUGenerator();
auto generator = p_gen->GetCPUEngine();
auto uniform = [&generator]() -> T {
std::uniform_real_distribution<T> u(0.0, 1.0);
return u(*generator);
};
BaseSampler<T, decltype(uniform)> standard_uniform(uniform);
auto normal = [&generator]() {
std::normal_distribution<T> n(0.0, 1.0);
return n(*generator);
};
BaseSampler<T, decltype(normal)> standard_normal(normal);
// sample from K gamma distributions, where K=alpha.numel()
framework::Tensor gamma_samples;
gamma_samples.mutable_data<T>(alpha->dims(), dev_ctx.GetPlace());
GammaCPUFunctor<T, decltype(uniform), decltype(normal)> gamma_functor(
alpha->data<T>(),
gamma_samples.data<T>(),
standard_uniform,
standard_normal);
platform::ForRange<phi::CPUContext> for_range(dev_ctx, alpha->numel());
for_range(gamma_functor);
// normalize them into a simplex, along the last axis
framework::Tensor gamma_sum;
auto new_shape = gamma_samples.dims();
new_shape[new_shape.size() - 1] = 1;
gamma_sum.mutable_data<T>(new_shape, dev_ctx.GetPlace());
ReduceKernelFunctor<phi::CPUContext, T, SumFunctor>(
&gamma_samples, &gamma_sum, {new_shape.size() - 1}, true, false, ctx)
.template apply<T>();
ElementwiseComputeEx<DivFunctor<T>, phi::CPUContext, T, T>(
ctx, &gamma_samples, &gamma_sum, -1, DivFunctor<T>(), out);
}
};
class DirichletOpMaker : public framework::OpProtoAndCheckerMaker { class DirichletOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
...@@ -100,29 +31,16 @@ class DirichletOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -100,29 +31,16 @@ class DirichletOpMaker : public framework::OpProtoAndCheckerMaker {
class DirichletOp : public framework::OperatorWithKernel { class DirichletOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("Alpha"), "Input", "Alpha", "dirichlet");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "dirichlet");
const auto alpha_dim = ctx->GetInputDim("Alpha");
PADDLE_ENFORCE_GE(alpha_dim.size(),
1,
platform::errors::InvalidArgument(
"ShapeError: The number of dimensions of 'Alpha' "
"must be greater than or euqal to 1. "
"But received Alpha's dimensions = %d,",
alpha_dim.size()));
ctx->ShareDim("Alpha", /*->*/ "Out");
}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
DECLARE_INFER_SHAPE_FUNCTOR(dirichlet,
DirichletInferShapeFunctor,
PD_INFER_META(phi::DirichletInferMeta));
REGISTER_OP_WITHOUT_GRADIENT(dirichlet, REGISTER_OP_WITHOUT_GRADIENT(dirichlet,
paddle::operators::DirichletOp, paddle::operators::DirichletOp,
paddle::operators::DirichletOpMaker); paddle::operators::DirichletOpMaker,
REGISTER_OP_CPU_KERNEL( DirichletInferShapeFunctor);
dirichlet,
paddle::operators::DirichletKernel<phi::CPUContext, float>,
paddle::operators::DirichletKernel<phi::CPUContext, double>);
...@@ -2531,6 +2531,15 @@ ...@@ -2531,6 +2531,15 @@
kernel: kernel:
func: broadcast_tensors func: broadcast_tensors
backward: broadcast_tensors_grad backward: broadcast_tensors_grad
# dirichlet
- api: dirichlet
args: (Tensor alpha)
output: Tensor
infer_meta:
func: DirichletInferMeta
kernel:
func: dirichlet
# eig # eig
- api: eig - api: eig
......
...@@ -518,6 +518,19 @@ void DiagonalInferMeta(const MetaTensor& input, ...@@ -518,6 +518,19 @@ void DiagonalInferMeta(const MetaTensor& input,
out->set_dims(phi::make_ddim(out_dims)); out->set_dims(phi::make_ddim(out_dims));
} }
void DirichletInferMeta(const MetaTensor& alpha, MetaTensor* out) {
const auto alpha_dim = alpha.dims();
PADDLE_ENFORCE_GE(alpha_dim.size(),
1,
phi::errors::InvalidArgument(
"ShapeError: The number of dimensions of 'Alpha' "
"must be greater than or euqal to 1. "
"But received Alpha's dimensions = %d,",
alpha_dim.size()));
out->set_dims(alpha_dim);
out->set_dtype(alpha.dtype());
}
void EigInferMeta(const MetaTensor& x, MetaTensor* out_w, MetaTensor* out_v) { void EigInferMeta(const MetaTensor& x, MetaTensor* out_w, MetaTensor* out_v) {
auto x_dims = x.dims(); auto x_dims = x.dims();
int rank = x_dims.size(); int rank = x_dims.size();
......
...@@ -90,6 +90,8 @@ void DiagInferMeta(const MetaTensor& x, ...@@ -90,6 +90,8 @@ void DiagInferMeta(const MetaTensor& x,
void DiagonalInferMeta( void DiagonalInferMeta(
const MetaTensor& input, int offset, int axis1, int axis2, MetaTensor* out); const MetaTensor& input, int offset, int axis1, int axis2, MetaTensor* out);
void DirichletInferMeta(const MetaTensor& alpha, MetaTensor* out);
void EigInferMeta(const MetaTensor& x, MetaTensor* out_w, MetaTensor* out_v); void EigInferMeta(const MetaTensor& x, MetaTensor* out_w, MetaTensor* out_v);
void EighInferMeta(const MetaTensor& x, void EighInferMeta(const MetaTensor& x,
...@@ -534,5 +536,4 @@ void ChannelShuffleInferMeta(const MetaTensor& x, ...@@ -534,5 +536,4 @@ void ChannelShuffleInferMeta(const MetaTensor& x,
MetaTensor* out); MetaTensor* out);
void IdentityLossInferMeta(const MetaTensor& x, int reduction, MetaTensor* out); void IdentityLossInferMeta(const MetaTensor& x, int reduction, MetaTensor* out);
} // namespace phi } // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/elementwise.h"
#include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/dirichlet_kernel_impl.h"
namespace phi {
template <typename T, typename UniformSamplerT, typename NormalSamplerT>
struct GammaCPUFunctor {
GammaCPUFunctor(const T* alpha,
T* gamma,
BaseSampler<T, UniformSamplerT> uniform,
BaseSampler<T, NormalSamplerT> normal)
: alpha_(alpha), gamma_(gamma), uniform_(uniform), normal_(normal) {}
HOST void operator()(int64_t index) {
auto sample = sample_gamma<T, T, UniformSamplerT, NormalSamplerT>(
alpha_[index], uniform_, normal_);
gamma_[index] = std::max(std::numeric_limits<T>::min(), sample);
}
const T* alpha_;
T* gamma_;
BaseSampler<T, UniformSamplerT> uniform_;
BaseSampler<T, NormalSamplerT> normal_;
};
template <typename T>
struct DirichletSampler<CPUContext, T> {
void operator()(const CPUContext& dev_ctx,
const DenseTensor& alpha,
DenseTensor* out) {
auto generator = dev_ctx.GetGenerator()->GetCPUEngine();
auto uniform = [&generator]() -> T {
std::uniform_real_distribution<T> u(0.0, 1.0);
return u(*generator);
};
BaseSampler<T, decltype(uniform)> standard_uniform(uniform);
auto normal = [&generator]() {
std::normal_distribution<T> n(0.0, 1.0);
return n(*generator);
};
BaseSampler<T, decltype(normal)> standard_normal(normal);
// sample from K gamma distributions, where K=alpha.numel()
DenseTensor gamma_samples;
gamma_samples.Resize(alpha.dims());
dev_ctx.template Alloc<T>(&gamma_samples);
GammaCPUFunctor<T, decltype(uniform), decltype(normal)> gamma_functor(
alpha.data<T>(),
gamma_samples.data<T>(),
standard_uniform,
standard_normal);
funcs::ForRange<CPUContext> for_range(dev_ctx, alpha.numel());
for_range(gamma_functor);
// normalize them into a simplex, along the last axis
DenseTensor gamma_sum;
auto new_shape = gamma_samples.dims();
new_shape[new_shape.size() - 1] = 1;
gamma_sum.Resize(new_shape);
dev_ctx.template Alloc<T>(&gamma_sum);
ReduceKernelImpl<CPUContext, T, T, funcs::SumFunctor>(
dev_ctx,
gamma_samples,
&gamma_sum,
{new_shape.size() - 1},
true,
false);
funcs::ElementwiseCompute<funcs::DivideFunctor<T>, T, T>(
dev_ctx, gamma_samples, gamma_sum, -1, funcs::DivideFunctor<T>(), out);
}
};
} // namespace phi
PD_REGISTER_KERNEL(
dirichlet, CPU, ALL_LAYOUT, phi::Dirichletkernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void Dirichletkernel(const Context& dev_ctx,
const DenseTensor& alpha,
DenseTensor* out);
} // namespace phi
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
...@@ -12,12 +14,14 @@ ...@@ -12,12 +14,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/dirichlet_op.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/fluid/framework/generator.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h" #include "paddle/phi/kernels/funcs/broadcast_function.h"
#include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/dirichlet_kernel_impl.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include <curand_kernel.h> #include <curand_kernel.h>
...@@ -38,8 +42,7 @@ using COMPAT_RANDSTATEPHILOX4_32_10_T = hiprandStatePhilox4_32_10_t; ...@@ -38,8 +42,7 @@ using COMPAT_RANDSTATEPHILOX4_32_10_T = hiprandStatePhilox4_32_10_t;
#define COMPAT_RAND_NORMAL hiprand_normal #define COMPAT_RAND_NORMAL hiprand_normal
#endif #endif
namespace paddle { namespace phi {
namespace operators {
template <typename T> template <typename T>
struct GammaCUDAFunctor { struct GammaCUDAFunctor {
GammaCUDAFunctor(const T* alpha, T* gamma, uint64_t seed, uint64_t offset) GammaCUDAFunctor(const T* alpha, T* gamma, uint64_t seed, uint64_t offset)
...@@ -70,47 +73,44 @@ struct GammaCUDAFunctor { ...@@ -70,47 +73,44 @@ struct GammaCUDAFunctor {
}; };
template <typename T> template <typename T>
struct DirichletSampler<platform::CUDADeviceContext, T> { struct DirichletSampler<GPUContext, T> {
void operator()(const framework::ExecutionContext& ctx, void operator()(const GPUContext& dev_ctx,
const framework::Tensor* alpha, const DenseTensor& alpha,
framework::Tensor* out) { DenseTensor* out) {
auto& dev_ctx = ctx.device_context<platform::CUDADeviceContext>(); auto p_gen = dev_ctx.GetGenerator();
// init state, seed & offset for all threads
int device_id = ctx.GetPlace().GetDeviceId();
auto p_gen = framework::DefaultCUDAGenerator(device_id);
auto seed_and_offset = p_gen->IncrementOffset(10); // hard-coded offset auto seed_and_offset = p_gen->IncrementOffset(10); // hard-coded offset
auto seed = seed_and_offset.first; auto seed = seed_and_offset.first;
auto offset = seed_and_offset.second; auto offset = seed_and_offset.second;
// sample from K gamma distributions, where K=alpha.numel() // sample from K gamma distributions, where K=alpha.numel()
framework::Tensor gamma_samples; DenseTensor gamma_samples;
gamma_samples.mutable_data<T>(alpha->dims(), dev_ctx.GetPlace()); gamma_samples.Resize(alpha.dims());
dev_ctx.template Alloc<T>(&gamma_samples);
GammaCUDAFunctor<T> gamma_functor( GammaCUDAFunctor<T> gamma_functor(
alpha->data<T>(), gamma_samples.data<T>(), seed, offset); alpha.data<T>(), gamma_samples.data<T>(), seed, offset);
platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx, funcs::ForRange<GPUContext> for_range(dev_ctx, out->numel());
out->numel());
for_range(gamma_functor); for_range(gamma_functor);
// normalize them into a simplex, along the last axis // normalize them into a simplex, along the last axis
framework::Tensor gamma_sum; DenseTensor gamma_sum;
auto new_shape = gamma_samples.dims(); auto new_shape = gamma_samples.dims();
new_shape[new_shape.size() - 1] = 1; new_shape[new_shape.size() - 1] = 1;
gamma_sum.mutable_data<T>(new_shape, dev_ctx.GetPlace()); gamma_sum.Resize(new_shape);
dev_ctx.template Alloc<T>(&gamma_sum);
ReduceKernelFunctor<platform::CUDADeviceContext, T, SumFunctor>( ReduceKernelImpl<GPUContext, T, T, funcs::SumFunctor>(
&gamma_samples, &gamma_sum, {new_shape.size() - 1}, true, false, ctx) dev_ctx,
.template apply<T>(); gamma_samples,
ElementwiseComputeEx<DivFunctor<T>, platform::CUDADeviceContext, T, T>( &gamma_sum,
ctx, &gamma_samples, &gamma_sum, -1, DivFunctor<T>(), out); {new_shape.size() - 1},
true,
false);
funcs::ElementwiseCompute<funcs::DivideFunctor<T>, T, T>(
dev_ctx, gamma_samples, gamma_sum, -1, funcs::DivideFunctor<T>(), out);
} }
}; };
} // namespace operators } // namespace phi
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( PD_REGISTER_KERNEL(
dirichlet, dirichlet, GPU, ALL_LAYOUT, phi::Dirichletkernel, float, double) {}
ops::DirichletKernel<paddle::platform::CUDADeviceContext, float>,
ops::DirichletKernel<paddle::platform::CUDADeviceContext, double>);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -13,11 +13,10 @@ ...@@ -13,11 +13,10 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <random> #include <random>
#include "paddle/phi/kernels/dirichlet_kernel.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/for_range.h"
// ROCM hcc doesn't work well with using std:: in kernel functions // ROCM hcc doesn't work well with using std:: in kernel functions
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
...@@ -42,10 +41,7 @@ ...@@ -42,10 +41,7 @@
#define COMPAT_LOG1P std::log1p #define COMPAT_LOG1P std::log1p
#endif #endif
namespace paddle { namespace phi {
namespace operators {
template <typename DeviceContext, typename T>
struct DirichletSampler;
template <typename ScalarT, typename SamplerT> template <typename ScalarT, typename SamplerT>
struct BaseSampler { struct BaseSampler {
...@@ -117,17 +113,19 @@ sample_gamma(ScalarT alpha, ...@@ -117,17 +113,19 @@ sample_gamma(ScalarT alpha,
} }
} }
template <typename DeviceContext, typename T> template <typename Context, typename T>
class DirichletKernel : public framework::OpKernel<T> { struct DirichletSampler {
public: void operator()(const Context& dev_ctx,
void Compute(const framework::ExecutionContext& ctx) const override { const DenseTensor& alpha,
const auto* alpha = ctx.Input<framework::Tensor>("Alpha"); DenseTensor* out);
auto* out = ctx.Output<framework::Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
DirichletSampler<DeviceContext, T> sampler;
sampler(ctx, alpha, out);
}
}; };
} // namespace operators
} // namespace paddle template <typename T, typename Context>
void Dirichletkernel(const Context& dev_ctx,
const DenseTensor& alpha,
DenseTensor* out) {
dev_ctx.template Alloc<T>(out);
DirichletSampler<Context, T> sampler;
sampler(dev_ctx, alpha, out);
}
} // namespace phi
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import paddle import paddle
from paddle.distribution import exponential_family from paddle.distribution import exponential_family
from paddle.fluid.data_feeder import check_variable_and_dtype from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.fluid.framework import _non_static_mode, in_dygraph_mode from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
...@@ -157,9 +157,10 @@ def _dirichlet(concentration, name=None): ...@@ -157,9 +157,10 @@ def _dirichlet(concentration, name=None):
check_variable_and_dtype(concentration, 'concentration', check_variable_and_dtype(concentration, 'concentration',
['float32', 'float64'], op_type) ['float32', 'float64'], op_type)
if _non_static_mode(): if in_dygraph_mode():
return paddle._C_ops.final_state_dirichlet(concentration)
elif _in_legacy_dygraph():
return paddle._C_ops.dirichlet(concentration) return paddle._C_ops.dirichlet(concentration)
else: else:
helper = LayerHelper(op_type, **locals()) helper = LayerHelper(op_type, **locals())
out = helper.create_variable_for_type_inference( out = helper.create_variable_for_type_inference(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册