Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
798a4eac
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
798a4eac
编写于
7月 28, 2022
作者:
X
Xiaoxu Chen
提交者:
GitHub
7月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
migrate dirichlet kernel to phi (#44434)
* migrate dirichlet op kernel to phi * fix dirichlet sample memory leak
上级
2781740b
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
222 addition
and
155 deletion
+222
-155
paddle/fluid/operators/dirichlet_op.cc
paddle/fluid/operators/dirichlet_op.cc
+11
-93
paddle/phi/api/yaml/legacy_api.yaml
paddle/phi/api/yaml/legacy_api.yaml
+9
-0
paddle/phi/infermeta/unary.cc
paddle/phi/infermeta/unary.cc
+13
-0
paddle/phi/infermeta/unary.h
paddle/phi/infermeta/unary.h
+2
-1
paddle/phi/kernels/cpu/dirichlet_kernel.cc
paddle/phi/kernels/cpu/dirichlet_kernel.cc
+102
-0
paddle/phi/kernels/dirichlet_kernel.h
paddle/phi/kernels/dirichlet_kernel.h
+25
-0
paddle/phi/kernels/gpu/dirichlet_kernel.cu
paddle/phi/kernels/gpu/dirichlet_kernel.cu
+37
-37
paddle/phi/kernels/impl/dirichlet_kernel_impl.h
paddle/phi/kernels/impl/dirichlet_kernel_impl.h
+19
-21
python/paddle/distribution/dirichlet.py
python/paddle/distribution/dirichlet.py
+4
-3
未找到文件。
paddle/fluid/operators/dirichlet_op.cc
浏览文件 @
798a4eac
...
@@ -11,83 +11,14 @@
...
@@ -11,83 +11,14 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/operators/dirichlet_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
template
<
typename
T
,
typename
UniformSamplerT
,
typename
NormalSamplerT
>
struct
GammaCPUFunctor
{
GammaCPUFunctor
(
const
T
*
alpha
,
T
*
gamma
,
BaseSampler
<
T
,
UniformSamplerT
>
uniform
,
BaseSampler
<
T
,
NormalSamplerT
>
normal
)
:
alpha_
(
alpha
),
gamma_
(
gamma
),
uniform_
(
uniform
),
normal_
(
normal
)
{}
HOST
void
operator
()(
int64_t
index
)
{
auto
sample
=
sample_gamma
<
T
,
T
,
UniformSamplerT
,
NormalSamplerT
>
(
alpha_
[
index
],
uniform_
,
normal_
);
gamma_
[
index
]
=
std
::
max
(
std
::
numeric_limits
<
T
>::
min
(),
sample
);
}
const
T
*
alpha_
;
T
*
gamma_
;
BaseSampler
<
T
,
UniformSamplerT
>
uniform_
;
BaseSampler
<
T
,
NormalSamplerT
>
normal_
;
};
template
<
typename
T
>
struct
DirichletSampler
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
,
const
Tensor
*
alpha
,
Tensor
*
out
)
{
auto
&
dev_ctx
=
ctx
.
device_context
<
phi
::
CPUContext
>
();
auto
p_gen
=
framework
::
DefaultCPUGenerator
();
auto
generator
=
p_gen
->
GetCPUEngine
();
auto
uniform
=
[
&
generator
]()
->
T
{
std
::
uniform_real_distribution
<
T
>
u
(
0.0
,
1.0
);
return
u
(
*
generator
);
};
BaseSampler
<
T
,
decltype
(
uniform
)
>
standard_uniform
(
uniform
);
auto
normal
=
[
&
generator
]()
{
std
::
normal_distribution
<
T
>
n
(
0.0
,
1.0
);
return
n
(
*
generator
);
};
BaseSampler
<
T
,
decltype
(
normal
)
>
standard_normal
(
normal
);
// sample from K gamma distributions, where K=alpha.numel()
framework
::
Tensor
gamma_samples
;
gamma_samples
.
mutable_data
<
T
>
(
alpha
->
dims
(),
dev_ctx
.
GetPlace
());
GammaCPUFunctor
<
T
,
decltype
(
uniform
),
decltype
(
normal
)
>
gamma_functor
(
alpha
->
data
<
T
>
(),
gamma_samples
.
data
<
T
>
(),
standard_uniform
,
standard_normal
);
platform
::
ForRange
<
phi
::
CPUContext
>
for_range
(
dev_ctx
,
alpha
->
numel
());
for_range
(
gamma_functor
);
// normalize them into a simplex, along the last axis
framework
::
Tensor
gamma_sum
;
auto
new_shape
=
gamma_samples
.
dims
();
new_shape
[
new_shape
.
size
()
-
1
]
=
1
;
gamma_sum
.
mutable_data
<
T
>
(
new_shape
,
dev_ctx
.
GetPlace
());
ReduceKernelFunctor
<
phi
::
CPUContext
,
T
,
SumFunctor
>
(
&
gamma_samples
,
&
gamma_sum
,
{
new_shape
.
size
()
-
1
},
true
,
false
,
ctx
)
.
template
apply
<
T
>();
ElementwiseComputeEx
<
DivFunctor
<
T
>
,
phi
::
CPUContext
,
T
,
T
>
(
ctx
,
&
gamma_samples
,
&
gamma_sum
,
-
1
,
DivFunctor
<
T
>
(),
out
);
}
};
class
DirichletOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
DirichletOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
...
@@ -100,29 +31,16 @@ class DirichletOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -100,29 +31,16 @@ class DirichletOpMaker : public framework::OpProtoAndCheckerMaker {
class
DirichletOp
:
public
framework
::
OperatorWithKernel
{
class
DirichletOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"Alpha"
),
"Input"
,
"Alpha"
,
"dirichlet"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"dirichlet"
);
const
auto
alpha_dim
=
ctx
->
GetInputDim
(
"Alpha"
);
PADDLE_ENFORCE_GE
(
alpha_dim
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"ShapeError: The number of dimensions of 'Alpha' "
"must be greater than or euqal to 1. "
"But received Alpha's dimensions = %d,"
,
alpha_dim
.
size
()));
ctx
->
ShareDim
(
"Alpha"
,
/*->*/
"Out"
);
}
};
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
DECLARE_INFER_SHAPE_FUNCTOR
(
dirichlet
,
DirichletInferShapeFunctor
,
PD_INFER_META
(
phi
::
DirichletInferMeta
));
REGISTER_OP_WITHOUT_GRADIENT
(
dirichlet
,
REGISTER_OP_WITHOUT_GRADIENT
(
dirichlet
,
paddle
::
operators
::
DirichletOp
,
paddle
::
operators
::
DirichletOp
,
paddle
::
operators
::
DirichletOpMaker
);
paddle
::
operators
::
DirichletOpMaker
,
REGISTER_OP_CPU_KERNEL
(
DirichletInferShapeFunctor
);
dirichlet
,
paddle
::
operators
::
DirichletKernel
<
phi
::
CPUContext
,
float
>
,
paddle
::
operators
::
DirichletKernel
<
phi
::
CPUContext
,
double
>
);
paddle/phi/api/yaml/legacy_api.yaml
浏览文件 @
798a4eac
...
@@ -2531,6 +2531,15 @@
...
@@ -2531,6 +2531,15 @@
kernel
:
kernel
:
func
:
broadcast_tensors
func
:
broadcast_tensors
backward
:
broadcast_tensors_grad
backward
:
broadcast_tensors_grad
# dirichlet
-
api
:
dirichlet
args
:
(Tensor alpha)
output
:
Tensor
infer_meta
:
func
:
DirichletInferMeta
kernel
:
func
:
dirichlet
# eig
# eig
-
api
:
eig
-
api
:
eig
...
...
paddle/phi/infermeta/unary.cc
浏览文件 @
798a4eac
...
@@ -518,6 +518,19 @@ void DiagonalInferMeta(const MetaTensor& input,
...
@@ -518,6 +518,19 @@ void DiagonalInferMeta(const MetaTensor& input,
out
->
set_dims
(
phi
::
make_ddim
(
out_dims
));
out
->
set_dims
(
phi
::
make_ddim
(
out_dims
));
}
}
void
DirichletInferMeta
(
const
MetaTensor
&
alpha
,
MetaTensor
*
out
)
{
const
auto
alpha_dim
=
alpha
.
dims
();
PADDLE_ENFORCE_GE
(
alpha_dim
.
size
(),
1
,
phi
::
errors
::
InvalidArgument
(
"ShapeError: The number of dimensions of 'Alpha' "
"must be greater than or euqal to 1. "
"But received Alpha's dimensions = %d,"
,
alpha_dim
.
size
()));
out
->
set_dims
(
alpha_dim
);
out
->
set_dtype
(
alpha
.
dtype
());
}
void
EigInferMeta
(
const
MetaTensor
&
x
,
MetaTensor
*
out_w
,
MetaTensor
*
out_v
)
{
void
EigInferMeta
(
const
MetaTensor
&
x
,
MetaTensor
*
out_w
,
MetaTensor
*
out_v
)
{
auto
x_dims
=
x
.
dims
();
auto
x_dims
=
x
.
dims
();
int
rank
=
x_dims
.
size
();
int
rank
=
x_dims
.
size
();
...
...
paddle/phi/infermeta/unary.h
浏览文件 @
798a4eac
...
@@ -90,6 +90,8 @@ void DiagInferMeta(const MetaTensor& x,
...
@@ -90,6 +90,8 @@ void DiagInferMeta(const MetaTensor& x,
void
DiagonalInferMeta
(
void
DiagonalInferMeta
(
const
MetaTensor
&
input
,
int
offset
,
int
axis1
,
int
axis2
,
MetaTensor
*
out
);
const
MetaTensor
&
input
,
int
offset
,
int
axis1
,
int
axis2
,
MetaTensor
*
out
);
void
DirichletInferMeta
(
const
MetaTensor
&
alpha
,
MetaTensor
*
out
);
void
EigInferMeta
(
const
MetaTensor
&
x
,
MetaTensor
*
out_w
,
MetaTensor
*
out_v
);
void
EigInferMeta
(
const
MetaTensor
&
x
,
MetaTensor
*
out_w
,
MetaTensor
*
out_v
);
void
EighInferMeta
(
const
MetaTensor
&
x
,
void
EighInferMeta
(
const
MetaTensor
&
x
,
...
@@ -534,5 +536,4 @@ void ChannelShuffleInferMeta(const MetaTensor& x,
...
@@ -534,5 +536,4 @@ void ChannelShuffleInferMeta(const MetaTensor& x,
MetaTensor
*
out
);
MetaTensor
*
out
);
void
IdentityLossInferMeta
(
const
MetaTensor
&
x
,
int
reduction
,
MetaTensor
*
out
);
void
IdentityLossInferMeta
(
const
MetaTensor
&
x
,
int
reduction
,
MetaTensor
*
out
);
}
// namespace phi
}
// namespace phi
paddle/phi/kernels/cpu/dirichlet_kernel.cc
0 → 100644
浏览文件 @
798a4eac
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/elementwise.h"
#include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/dirichlet_kernel_impl.h"
namespace
phi
{
template
<
typename
T
,
typename
UniformSamplerT
,
typename
NormalSamplerT
>
struct
GammaCPUFunctor
{
GammaCPUFunctor
(
const
T
*
alpha
,
T
*
gamma
,
BaseSampler
<
T
,
UniformSamplerT
>
uniform
,
BaseSampler
<
T
,
NormalSamplerT
>
normal
)
:
alpha_
(
alpha
),
gamma_
(
gamma
),
uniform_
(
uniform
),
normal_
(
normal
)
{}
HOST
void
operator
()(
int64_t
index
)
{
auto
sample
=
sample_gamma
<
T
,
T
,
UniformSamplerT
,
NormalSamplerT
>
(
alpha_
[
index
],
uniform_
,
normal_
);
gamma_
[
index
]
=
std
::
max
(
std
::
numeric_limits
<
T
>::
min
(),
sample
);
}
const
T
*
alpha_
;
T
*
gamma_
;
BaseSampler
<
T
,
UniformSamplerT
>
uniform_
;
BaseSampler
<
T
,
NormalSamplerT
>
normal_
;
};
template
<
typename
T
>
struct
DirichletSampler
<
CPUContext
,
T
>
{
void
operator
()(
const
CPUContext
&
dev_ctx
,
const
DenseTensor
&
alpha
,
DenseTensor
*
out
)
{
auto
generator
=
dev_ctx
.
GetGenerator
()
->
GetCPUEngine
();
auto
uniform
=
[
&
generator
]()
->
T
{
std
::
uniform_real_distribution
<
T
>
u
(
0.0
,
1.0
);
return
u
(
*
generator
);
};
BaseSampler
<
T
,
decltype
(
uniform
)
>
standard_uniform
(
uniform
);
auto
normal
=
[
&
generator
]()
{
std
::
normal_distribution
<
T
>
n
(
0.0
,
1.0
);
return
n
(
*
generator
);
};
BaseSampler
<
T
,
decltype
(
normal
)
>
standard_normal
(
normal
);
// sample from K gamma distributions, where K=alpha.numel()
DenseTensor
gamma_samples
;
gamma_samples
.
Resize
(
alpha
.
dims
());
dev_ctx
.
template
Alloc
<
T
>(
&
gamma_samples
);
GammaCPUFunctor
<
T
,
decltype
(
uniform
),
decltype
(
normal
)
>
gamma_functor
(
alpha
.
data
<
T
>
(),
gamma_samples
.
data
<
T
>
(),
standard_uniform
,
standard_normal
);
funcs
::
ForRange
<
CPUContext
>
for_range
(
dev_ctx
,
alpha
.
numel
());
for_range
(
gamma_functor
);
// normalize them into a simplex, along the last axis
DenseTensor
gamma_sum
;
auto
new_shape
=
gamma_samples
.
dims
();
new_shape
[
new_shape
.
size
()
-
1
]
=
1
;
gamma_sum
.
Resize
(
new_shape
);
dev_ctx
.
template
Alloc
<
T
>(
&
gamma_sum
);
ReduceKernelImpl
<
CPUContext
,
T
,
T
,
funcs
::
SumFunctor
>
(
dev_ctx
,
gamma_samples
,
&
gamma_sum
,
{
new_shape
.
size
()
-
1
},
true
,
false
);
funcs
::
ElementwiseCompute
<
funcs
::
DivideFunctor
<
T
>
,
T
,
T
>
(
dev_ctx
,
gamma_samples
,
gamma_sum
,
-
1
,
funcs
::
DivideFunctor
<
T
>
(),
out
);
}
};
}
// namespace phi
PD_REGISTER_KERNEL
(
dirichlet
,
CPU
,
ALL_LAYOUT
,
phi
::
Dirichletkernel
,
float
,
double
)
{}
paddle/phi/kernels/dirichlet_kernel.h
0 → 100644
浏览文件 @
798a4eac
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
Dirichletkernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
alpha
,
DenseTensor
*
out
);
}
// namespace phi
paddle/
fluid/operators/dirichlet_op
.cu
→
paddle/
phi/kernels/gpu/dirichlet_kernel
.cu
浏览文件 @
798a4eac
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
...
@@ -12,12 +14,14 @@
...
@@ -12,12 +14,14 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/operators/dirichlet_op.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/phi/kernels/cpu/reduce.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
#include "paddle/phi/kernels/funcs/broadcast_function.h"
#include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/dirichlet_kernel_impl.h"
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <curand_kernel.h>
#include <curand_kernel.h>
...
@@ -38,8 +42,7 @@ using COMPAT_RANDSTATEPHILOX4_32_10_T = hiprandStatePhilox4_32_10_t;
...
@@ -38,8 +42,7 @@ using COMPAT_RANDSTATEPHILOX4_32_10_T = hiprandStatePhilox4_32_10_t;
#define COMPAT_RAND_NORMAL hiprand_normal
#define COMPAT_RAND_NORMAL hiprand_normal
#endif
#endif
namespace
paddle
{
namespace
phi
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
>
struct
GammaCUDAFunctor
{
struct
GammaCUDAFunctor
{
GammaCUDAFunctor
(
const
T
*
alpha
,
T
*
gamma
,
uint64_t
seed
,
uint64_t
offset
)
GammaCUDAFunctor
(
const
T
*
alpha
,
T
*
gamma
,
uint64_t
seed
,
uint64_t
offset
)
...
@@ -70,47 +73,44 @@ struct GammaCUDAFunctor {
...
@@ -70,47 +73,44 @@ struct GammaCUDAFunctor {
};
};
template
<
typename
T
>
template
<
typename
T
>
struct
DirichletSampler
<
platform
::
CUDADeviceContext
,
T
>
{
struct
DirichletSampler
<
GPUContext
,
T
>
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
,
void
operator
()(
const
GPUContext
&
dev_ctx
,
const
framework
::
Tensor
*
alpha
,
const
DenseTensor
&
alpha
,
framework
::
Tensor
*
out
)
{
DenseTensor
*
out
)
{
auto
&
dev_ctx
=
ctx
.
device_context
<
platform
::
CUDADeviceContext
>
();
auto
p_gen
=
dev_ctx
.
GetGenerator
();
// init state, seed & offset for all threads
int
device_id
=
ctx
.
GetPlace
().
GetDeviceId
();
auto
p_gen
=
framework
::
DefaultCUDAGenerator
(
device_id
);
auto
seed_and_offset
=
p_gen
->
IncrementOffset
(
10
);
// hard-coded offset
auto
seed_and_offset
=
p_gen
->
IncrementOffset
(
10
);
// hard-coded offset
auto
seed
=
seed_and_offset
.
first
;
auto
seed
=
seed_and_offset
.
first
;
auto
offset
=
seed_and_offset
.
second
;
auto
offset
=
seed_and_offset
.
second
;
// sample from K gamma distributions, where K=alpha.numel()
// sample from K gamma distributions, where K=alpha.numel()
framework
::
Tensor
gamma_samples
;
DenseTensor
gamma_samples
;
gamma_samples
.
mutable_data
<
T
>
(
alpha
->
dims
(),
dev_ctx
.
GetPlace
());
gamma_samples
.
Resize
(
alpha
.
dims
());
dev_ctx
.
template
Alloc
<
T
>(
&
gamma_samples
);
GammaCUDAFunctor
<
T
>
gamma_functor
(
GammaCUDAFunctor
<
T
>
gamma_functor
(
alpha
->
data
<
T
>
(),
gamma_samples
.
data
<
T
>
(),
seed
,
offset
);
alpha
.
data
<
T
>
(),
gamma_samples
.
data
<
T
>
(),
seed
,
offset
);
platform
::
ForRange
<
platform
::
CUDADeviceContext
>
for_range
(
dev_ctx
,
funcs
::
ForRange
<
GPUContext
>
for_range
(
dev_ctx
,
out
->
numel
());
out
->
numel
());
for_range
(
gamma_functor
);
for_range
(
gamma_functor
);
// normalize them into a simplex, along the last axis
// normalize them into a simplex, along the last axis
framework
::
Tensor
gamma_sum
;
Dense
Tensor
gamma_sum
;
auto
new_shape
=
gamma_samples
.
dims
();
auto
new_shape
=
gamma_samples
.
dims
();
new_shape
[
new_shape
.
size
()
-
1
]
=
1
;
new_shape
[
new_shape
.
size
()
-
1
]
=
1
;
gamma_sum
.
mutable_data
<
T
>
(
new_shape
,
dev_ctx
.
GetPlace
());
gamma_sum
.
Resize
(
new_shape
);
dev_ctx
.
template
Alloc
<
T
>(
&
gamma_sum
);
ReduceKernelFunctor
<
platform
::
CUDADeviceContext
,
T
,
SumFunctor
>
(
ReduceKernelImpl
<
GPUContext
,
T
,
T
,
funcs
::
SumFunctor
>
(
&
gamma_samples
,
&
gamma_sum
,
{
new_shape
.
size
()
-
1
},
true
,
false
,
ctx
)
dev_ctx
,
.
template
apply
<
T
>();
gamma_samples
,
ElementwiseComputeEx
<
DivFunctor
<
T
>
,
platform
::
CUDADeviceContext
,
T
,
T
>
(
&
gamma_sum
,
ctx
,
&
gamma_samples
,
&
gamma_sum
,
-
1
,
DivFunctor
<
T
>
(),
out
);
{
new_shape
.
size
()
-
1
},
true
,
false
);
funcs
::
ElementwiseCompute
<
funcs
::
DivideFunctor
<
T
>
,
T
,
T
>
(
dev_ctx
,
gamma_samples
,
gamma_sum
,
-
1
,
funcs
::
DivideFunctor
<
T
>
(),
out
);
}
}
};
};
}
// namespace operators
}
// namespace phi
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
PD_REGISTER_KERNEL
(
dirichlet
,
dirichlet
,
GPU
,
ALL_LAYOUT
,
phi
::
Dirichletkernel
,
float
,
double
)
{}
ops
::
DirichletKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
DirichletKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/
fluid/operators/dirichlet_op
.h
→
paddle/
phi/kernels/impl/dirichlet_kernel_impl
.h
浏览文件 @
798a4eac
// Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
@@ -13,11 +13,10 @@
...
@@ -13,11 +13,10 @@
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <cmath>
#include <cmath>
#include <random>
#include <random>
#include "paddle/phi/kernels/dirichlet_kernel.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/for_range.h"
// ROCM hcc doesn't work well with using std:: in kernel functions
// ROCM hcc doesn't work well with using std:: in kernel functions
#if defined(PADDLE_WITH_CUDA)
#if defined(PADDLE_WITH_CUDA)
...
@@ -42,10 +41,7 @@
...
@@ -42,10 +41,7 @@
#define COMPAT_LOG1P std::log1p
#define COMPAT_LOG1P std::log1p
#endif
#endif
namespace
paddle
{
namespace
phi
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
struct
DirichletSampler
;
template
<
typename
ScalarT
,
typename
SamplerT
>
template
<
typename
ScalarT
,
typename
SamplerT
>
struct
BaseSampler
{
struct
BaseSampler
{
...
@@ -117,17 +113,19 @@ sample_gamma(ScalarT alpha,
...
@@ -117,17 +113,19 @@ sample_gamma(ScalarT alpha,
}
}
}
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
Context
,
typename
T
>
class
DirichletKernel
:
public
framework
::
OpKernel
<
T
>
{
struct
DirichletSampler
{
public:
void
operator
()(
const
Context
&
dev_ctx
,
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
DenseTensor
&
alpha
,
const
auto
*
alpha
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Alpha"
);
DenseTensor
*
out
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
DirichletSampler
<
DeviceContext
,
T
>
sampler
;
sampler
(
ctx
,
alpha
,
out
);
}
};
};
}
// namespace operators
}
// namespace paddle
template
<
typename
T
,
typename
Context
>
void
Dirichletkernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
alpha
,
DenseTensor
*
out
)
{
dev_ctx
.
template
Alloc
<
T
>(
out
);
DirichletSampler
<
Context
,
T
>
sampler
;
sampler
(
dev_ctx
,
alpha
,
out
);
}
}
// namespace phi
python/paddle/distribution/dirichlet.py
浏览文件 @
798a4eac
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
import
paddle
import
paddle
from
paddle.distribution
import
exponential_family
from
paddle.distribution
import
exponential_family
from
paddle.fluid.data_feeder
import
check_variable_and_dtype
from
paddle.fluid.data_feeder
import
check_variable_and_dtype
from
paddle.fluid.framework
import
_non_static_mode
,
in_dygraph_mode
from
paddle.fluid.framework
import
in_dygraph_mode
,
_in_legacy_dygraph
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.layer_helper
import
LayerHelper
...
@@ -157,9 +157,10 @@ def _dirichlet(concentration, name=None):
...
@@ -157,9 +157,10 @@ def _dirichlet(concentration, name=None):
check_variable_and_dtype
(
concentration
,
'concentration'
,
check_variable_and_dtype
(
concentration
,
'concentration'
,
[
'float32'
,
'float64'
],
op_type
)
[
'float32'
,
'float64'
],
op_type
)
if
_non_static_mode
():
if
in_dygraph_mode
():
return
paddle
.
_C_ops
.
final_state_dirichlet
(
concentration
)
elif
_in_legacy_dygraph
():
return
paddle
.
_C_ops
.
dirichlet
(
concentration
)
return
paddle
.
_C_ops
.
dirichlet
(
concentration
)
else
:
else
:
helper
=
LayerHelper
(
op_type
,
**
locals
())
helper
=
LayerHelper
(
op_type
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
out
=
helper
.
create_variable_for_type_inference
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录