Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c66eec75
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
“1010e39bdf738029fcb78b0d388a91dfdebdda2f”上不存在“paddle/fluid/lite/api/paddle_use_passes.h”
提交
c66eec75
编写于
9月 25, 2020
作者:
P
pangyoki
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support num_distribution different multinomial distributions
上级
8e14302f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
150 addition
and
55 deletion
+150
-55
paddle/fluid/operators/multinomial_op.cc
paddle/fluid/operators/multinomial_op.cc
+8
-1
paddle/fluid/operators/multinomial_op.cu
paddle/fluid/operators/multinomial_op.cu
+134
-51
python/paddle/fluid/tests/unittests/test_multinomial_op.py
python/paddle/fluid/tests/unittests/test_multinomial_op.py
+8
-3
未找到文件。
paddle/fluid/operators/multinomial_op.cc
浏览文件 @
c66eec75
...
@@ -30,6 +30,7 @@ class MultinomialOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -30,6 +30,7 @@ class MultinomialOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
"A tensor contains probabilities of categories"
);
AddInput
(
"X"
,
"A tensor contains probabilities of categories"
);
AddOutput
(
"Out"
,
"The output tensor of multinomial op"
);
AddOutput
(
"Out"
,
"The output tensor of multinomial op"
);
// AddOutput("yokiOut", "yoki");
AddAttr
<
int
>
(
"num_samples"
,
"number of the generated samples"
)
AddAttr
<
int
>
(
"num_samples"
,
"number of the generated samples"
)
.
SetDefault
(
1
);
.
SetDefault
(
1
);
AddAttr
<
bool
>
(
"replacement"
,
"can a category be sampled more than once"
)
AddAttr
<
bool
>
(
"replacement"
,
"can a category be sampled more than once"
)
...
@@ -49,7 +50,7 @@ class MultinomialOp : public framework::OperatorWithKernel {
...
@@ -49,7 +50,7 @@ class MultinomialOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"Multinomial"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"Multinomial"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"Multinomial"
);
//
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Multinomial");
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
int64_t
x_rank
=
x_dim
.
size
();
int64_t
x_rank
=
x_dim
.
size
();
...
@@ -62,6 +63,7 @@ class MultinomialOp : public framework::OperatorWithKernel {
...
@@ -62,6 +63,7 @@ class MultinomialOp : public framework::OperatorWithKernel {
out_dims
[
x_rank
-
1
]
=
num_samples
;
out_dims
[
x_rank
-
1
]
=
num_samples
;
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
out_dims
));
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
out_dims
));
// ctx->SetOutputDim("yokiOut", x_dim);
}
}
};
};
...
@@ -72,11 +74,16 @@ class MultinomialOpKernel<platform::CPUDeviceContext, T>
...
@@ -72,11 +74,16 @@ class MultinomialOpKernel<platform::CPUDeviceContext, T>
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
x
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
const
auto
x
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
// auto yokiout = ctx.Output<framework::Tensor>("yokiOut");
const
int64_t
num_samples
=
ctx
.
Attr
<
int
>
(
"num_samples"
);
const
int64_t
num_samples
=
ctx
.
Attr
<
int
>
(
"num_samples"
);
const
bool
replacement
=
ctx
.
Attr
<
bool
>
(
"replacement"
);
const
bool
replacement
=
ctx
.
Attr
<
bool
>
(
"replacement"
);
auto
*
in_data
=
x
->
data
<
T
>
();
auto
*
in_data
=
x
->
data
<
T
>
();
auto
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
/*auto *yokiout_data = yokiout->mutable_data<T>(ctx.GetPlace());
for (int i = 0; i < x->numel(); i++) {
yokiout_data[i] = in_data[i];
}*/
auto
in_dims
=
x
->
dims
();
auto
in_dims
=
x
->
dims
();
int64_t
in_rank
=
in_dims
.
size
();
int64_t
in_rank
=
in_dims
.
size
();
...
...
paddle/fluid/operators/multinomial_op.cu
浏览文件 @
c66eec75
...
@@ -70,8 +70,31 @@ template <typename T>
...
@@ -70,8 +70,31 @@ template <typename T>
__global__
void
NormalizeProbability
(
T
*
norm_probs
,
const
T
*
in_data
,
__global__
void
NormalizeProbability
(
T
*
norm_probs
,
const
T
*
in_data
,
T
*
sum_rows
)
{
T
*
sum_rows
)
{
// int id = blockIdx.x * blockDim.x + threadIdx.x;
// int id = blockIdx.x * blockDim.x + threadIdx.x;
int
id
=
threadIdx
.
x
;
// int id = threadIdx.x;
norm_probs
[
id
]
=
in_data
[
id
]
/
sum_rows
[
0
];
int
id
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
+
blockIdx
.
y
*
gridDim
.
x
*
blockDim
.
x
;
norm_probs
[
id
]
=
in_data
[
id
]
/
sum_rows
[
blockIdx
.
y
];
}
template
<
typename
T
>
__global__
void
yokiFunc
(
const
T
*
in_data
,
T
*
out
)
{
// int id = blockIdx.x * blockDim.x + threadIdx.x;
// int id = threadIdx.x;
int
id
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
+
blockIdx
.
y
*
gridDim
.
x
*
blockDim
.
x
;
out
[
id
]
=
in_data
[
id
];
}
template
<
typename
T
>
__global__
void
Cumsum
(
T
*
norm_probs_data
,
int64_t
num_distributions
,
int64_t
num_categories
,
T
*
cumulative_probs
)
{
// int id = blockIdx.x;
for
(
int
id
=
blockIdx
.
x
;
id
<
num_distributions
;
id
+=
gridDim
.
x
)
{
thrust
::
inclusive_scan
(
thrust
::
device
,
norm_probs_data
+
id
*
num_categories
,
norm_probs_data
+
(
id
+
1
)
*
num_categories
,
cumulative_probs
+
id
*
num_categories
);
}
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -141,21 +164,29 @@ __global__ void sampleMultinomialWithReplacement(
...
@@ -141,21 +164,29 @@ __global__ void sampleMultinomialWithReplacement(
// global index formula for 2D grid of 1D blocks
// global index formula for 2D grid of 1D blocks
// int idx = blockIdx.y * gridDim.x * blockDim.x + blockIdx.x * blockDim.x +
// int idx = blockIdx.y * gridDim.x * blockDim.x + blockIdx.x * blockDim.x +
// threadIdx.x;
// threadIdx.x;
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
sample
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// int idx = blockIdx.x * blockDim.x + threadIdx.x;
sample
<
totalSamples
;
sample
+=
blockDim
.
x
*
gridDim
.
x
)
{
// we are losing 3 out of 4 generated numbers but it's ok
// this kernel is not very efficient anyway
// T uniform_random = dist(rng);
int
idx
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
+
T
uniform_random
=
rng
[
sample
]
;
blockIdx
.
y
*
gridDim
.
x
*
blockDim
.
x
;
// Find the bucket that a uniform sample lies in
for
(
int
curDist
=
blockIdx
.
y
;
curDist
<
distributions
;
int
choice
=
binarySearchForMultinomial
<
T
>
(
normDistPrefixSum
,
normDist
,
curDist
+=
gridDim
.
y
)
{
categories
,
uniform_random
);
for
(
int
sample
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
sample
<
totalSamples
;
sample
+=
blockDim
.
x
*
gridDim
.
x
)
{
// we are losing 3 out of 4 generated numbers but it's ok
// this kernel is not very efficient anyway
dest
[
sample
]
=
choice
;
// T uniform_random = dist(rng);
T
uniform_random
=
rng
[
sample
+
curDist
*
totalSamples
];
// Find the bucket that a uniform sample lies in
int
choice
=
binarySearchForMultinomial
<
T
>
(
normDistPrefixSum
+
curDist
*
categories
,
normDist
+
curDist
*
categories
,
categories
,
uniform_random
);
dest
[
sample
+
curDist
*
totalSamples
]
=
choice
;
}
}
}
}
}
...
@@ -167,17 +198,48 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
...
@@ -167,17 +198,48 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
const
auto
x
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
const
auto
x
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
// auto yokiout = ctx.Output<framework::Tensor>("yokiOut");
const
int64_t
num_samples
=
ctx
.
Attr
<
int
>
(
"num_samples"
);
const
int64_t
num_samples
=
ctx
.
Attr
<
int
>
(
"num_samples"
);
const
bool
replacement
=
ctx
.
Attr
<
bool
>
(
"replacement"
);
const
bool
replacement
=
ctx
.
Attr
<
bool
>
(
"replacement"
);
auto
*
in_data
=
x
->
data
<
T
>
();
auto
*
in_data
=
x
->
data
<
T
>
();
auto
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// auto* yokiout_data = yokiout->mutable_data<T>(ctx.GetPlace());
auto
in_dims
=
x
->
dims
();
auto
in_dims
=
x
->
dims
();
int64_t
in_rank
=
in_dims
.
size
();
int64_t
in_rank
=
in_dims
.
size
();
const
int64_t
num_categories
=
in_dims
[
in_rank
-
1
];
const
int64_t
num_categories
=
in_dims
[
in_rank
-
1
];
const
int64_t
num_distributions
=
in_rank
>
1
?
in_dims
[
in_rank
-
2
]
:
1
;
const
int64_t
num_distributions
=
in_rank
>
1
?
in_dims
[
in_rank
-
2
]
:
1
;
if
(
!
replacement
)
{
int
in_data_numel
=
x
->
numel
();
int
out_data_numel
=
out
->
numel
();
// std::vector<T> cpu_in_data(in_data_numel);
// std::vector<T> cpu_out_data(out_data_numel);
// T cpu_in_data[in_data_numel];
// T cpu_out_data[out_data_numel];
T
*
cpu_in_data
=
new
T
[
in_data_numel
];
T
*
cpu_out_data
=
new
T
[
out_data_numel
];
cudaMemcpy
(
cpu_in_data
,
in_data
,
in_data_numel
*
sizeof
(
T
),
cudaMemcpyDeviceToHost
);
VLOG
(
3
)
<<
"Print cpu_in_data "
<<
cpu_in_data
[
0
]
<<
"
\n
"
;
VLOG
(
3
)
<<
"Print in_data_numel "
<<
in_data_numel
<<
"
\n
"
;
VLOG
(
3
)
<<
"Print out_data_numel "
<<
out_data_numel
<<
"
\n
"
;
MultinomialFunctor
<
T
>
(
cpu_out_data
,
cpu_in_data
,
num_samples
,
replacement
,
num_categories
,
num_distributions
);
cudaMemcpy
(
out_data
,
cpu_out_data
,
out_data_numel
*
sizeof
(
T
),
cudaMemcpyHostToDevice
);
delete
[]
cpu_in_data
;
delete
[]
cpu_out_data
;
return
;
}
// std::vector<T> sum_rows(num_distributions);
// std::vector<T> sum_rows(num_distributions);
// SumArrayCUDAKernel<T>(in_data, sum_rows,)
// SumArrayCUDAKernel<T>(in_data, sum_rows,)
...
@@ -188,30 +250,44 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
...
@@ -188,30 +250,44 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
VLOG
(
3
)
<<
"Print in_rank "
<<
in_rank
<<
"
\n
"
;
VLOG
(
3
)
<<
"Print in_rank "
<<
in_rank
<<
"
\n
"
;
framework
::
Tensor
sum_rows_t
;
framework
::
Tensor
sum_rows_t
;
auto
*
sum_rows_data
=
sum_rows_t
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
auto
*
sum_rows_data
=
sum_rows_t
.
mutable_data
<
T
>
({
num_distributions
},
ctx
.
GetPlace
());
// auto* sum_rows_data =
// auto* sum_rows_data =
// sum_rows_t->mutable_data<T>(framework::make_ddim({1}), ctx.GetPlace());
// sum_rows_t->mutable_data<T>(framework::make_ddim({num_distributions}),
// ctx.GetPlace());
auto
&
place
=
*
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>()
auto
&
place
=
*
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>()
.
eigen_device
();
.
eigen_device
();
auto
eigen_input
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
x
);
if
(
num_distributions
==
1
)
{
// auto eigen_sum_rows = framework::EigenVector<T>::From(sum_rows_t);
auto
eigen_input
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
x
);
auto
eigen_sum_rows
=
framework
::
EigenScalar
<
T
>::
From
(
sum_rows_t
);
auto
eigen_sum_rows
=
framework
::
EigenVector
<
T
>::
From
(
sum_rows_t
);
eigen_sum_rows
.
device
(
place
)
=
// auto eigen_sum_rows = framework::EigenScalar<T>::From(sum_rows_t);
eigen_input
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
))
eigen_sum_rows
.
device
(
place
)
=
.
eval
()
eigen_input
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
1
))
.
reshape
(
Eigen
::
DSizes
<
int
,
1
>
(
sum_rows_t
.
dims
()[
0
]));
.
eval
()
// eigen_sum_rows.device(place) =
.
reshape
(
Eigen
::
DSizes
<
int
,
1
>
(
sum_rows_t
.
dims
()[
0
]));
// eigen_input.sum().eval().reshape(Eigen::DSizes<int, 1>(1));
}
else
{
auto
eigen_input
=
framework
::
EigenMatrix
<
T
>::
From
(
*
x
);
dim3
grid
(
num_distributions
);
// auto eigen_sum_rows = framework::EigenVector<T>::From(sum_rows_t);
dim3
block
(
num_categories
);
auto
eigen_sum_rows
=
framework
::
EigenVector
<
T
>::
From
(
sum_rows_t
);
eigen_sum_rows
.
device
(
place
)
=
eigen_input
.
sum
(
Eigen
::
DSizes
<
int
,
1
>
(
1
));
// .eval()
// .reshape(Eigen::DSizes<int, 1>(sum_rows_t.dims()[0]));
// eigen_sum_rows.device(place) =
// eigen_input.sum().eval().reshape(Eigen::DSizes<int, 1>(1));
}
// std::vector<T> in_data_norm(num_categories);
// std::vector<T> in_data_norm(num_categories);
framework
::
Tensor
norm_probs_t
;
framework
::
Tensor
norm_probs_t
;
auto
*
norm_probs_data
=
auto
*
norm_probs_data
=
norm_probs_t
.
mutable_data
<
T
>
(
norm_probs_t
.
mutable_data
<
T
>
({
num_categories
},
ctx
.
GetPlace
());
{
num_distributions
,
num_categories
},
ctx
.
GetPlace
());
// dim3 grid(num_distributions);
// dim3 block(num_categories);
dim3
block
(
num_categories
<
512
?
num_categories
:
512
);
dim3
grid
((
num_categories
-
1
)
/
block
.
x
+
1
,
num_distributions
);
NormalizeProbability
<
NormalizeProbability
<
T
><<<
grid
,
block
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
T
><<<
grid
,
block
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
norm_probs_data
,
in_data
,
sum_rows_data
);
norm_probs_data
,
in_data
,
sum_rows_data
);
...
@@ -219,43 +295,46 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
...
@@ -219,43 +295,46 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
// num_distributions can only be 1.
// num_distributions can only be 1.
// std::vector<T> cumulative_probs(num_categories);
// std::vector<T> cumulative_probs(num_categories);
framework
::
Tensor
cumulative_probs_t
;
framework
::
Tensor
cumulative_probs_t
;
auto
*
cumulative_probs
=
auto
*
cumulative_probs
=
cumulative_probs_t
.
mutable_data
<
T
>
(
cumulative_probs_t
.
mutable_data
<
T
>
({
num_categories
},
ctx
.
GetPlace
());
{
num_distributions
,
num_categories
},
ctx
.
GetPlace
());
// T cumulative_probs[num_categories];
// T cumulative_probs[num_categories];
int64_t
size
=
num_categories
;
dim3
block1
(
1
);
thrust
::
inclusive_scan
(
thrust
::
device
,
norm_probs_data
,
dim3
grid1
(
num_distributions
);
norm_probs_data
+
num_categories
,
cumulative_probs
);
Cumsum
<
T
><<<
grid1
,
block1
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
norm_probs_data
,
num_distributions
,
num_categories
,
cumulative_probs
);
/*
dim3 block2(num_categories < 512 ? num_categories : 512);
dim3 grid2((num_categories-1)/block2.x+1, num_distributions);
yokiFunc<T><<<grid2, block2, 0, ctx.cuda_device_context().stream()>>>(
cumulative_probs, yokiout_data);*/
// int64_t size = num_categories;
// thrust::inclusive_scan(thrust::device, norm_probs_data,
// norm_probs_data + num_categories,
// cumulative_probs);
VLOG
(
3
)
<<
"Print cumsum "
<<
cumulative_probs
<<
"
\n
"
;
if
(
replacement
)
{
if
(
replacement
)
{
dim3
block
(
128
);
dim3
block
(
128
);
// int grid_y = 1;
// int grid_y = 1;
dim3
grid
((
num_samples
-
1
)
/
block
.
x
+
1
);
dim3
grid
((
num_samples
-
1
)
/
block
.
x
+
1
,
num_distributions
);
/*
// std::vector<T> rng(num_samples);
T rng[num_samples];
std::uniform_real_distribution<T> dist(0, 1);
auto gen_ptr = framework::DefaultCPUGenerator();
auto engine = gen_ptr->GetCPUEngine();
for (int s = 0; s < num_samples; s++) {
rng[s] = dist(*engine);
}
*/
std
::
random_device
rd
;
std
::
random_device
rd
;
auto
seed
=
rd
();
auto
seed
=
rd
();
framework
::
Tensor
rng_data_t
;
framework
::
Tensor
rng_data_t
;
auto
*
rng_data
=
auto
*
rng_data
=
rng_data_t
.
mutable_data
<
T
>
(
rng_data_t
.
mutable_data
<
T
>
({
num_samples
},
ctx
.
GetPlace
());
{
num_distributions
,
num_samples
},
ctx
.
GetPlace
());
thrust
::
counting_iterator
<
unsigned
int
>
index_sequence_begin
(
0
);
thrust
::
counting_iterator
<
unsigned
int
>
index_sequence_begin
(
0
);
platform
::
Transform
<
platform
::
CUDADeviceContext
>
trans
;
platform
::
Transform
<
platform
::
CUDADeviceContext
>
trans
;
auto
*
context
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
auto
*
context
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
&
ctx
.
device_context
());
&
ctx
.
device_context
());
trans
(
*
context
,
index_sequence_begin
,
index_sequence_begin
+
num_samples
,
trans
(
*
context
,
index_sequence_begin
,
rng_data
,
RandomGeneratorCudaFunctor
<
T
>
(
seed
));
index_sequence_begin
+
num_distributions
*
num_samples
,
rng_data
,
RandomGeneratorCudaFunctor
<
T
>
(
seed
));
VLOG
(
3
)
<<
"Print enter
\n
"
;
VLOG
(
3
)
<<
"Print enter
\n
"
;
// VLOG(3) << "Print size in_data " <<
// VLOG(3) << "Print size in_data " <<
...
@@ -267,8 +346,12 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
...
@@ -267,8 +346,12 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
T
><<<
grid
,
block
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
T
><<<
grid
,
block
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
rng_data
,
num_samples
,
out_data
,
num_distributions
,
num_categories
,
rng_data
,
num_samples
,
out_data
,
num_distributions
,
num_categories
,
cumulative_probs
,
norm_probs_data
);
cumulative_probs
,
norm_probs_data
);
VLOG
(
3
)
<<
"Print end
\n
"
<<
out_data
;
}
}
VLOG
(
3
)
<<
"Print final end
\n
"
;
// MultinomialCudaFunctor<T>(out_data, in_data, num_samples, replacement,
// MultinomialCudaFunctor<T>(out_data, in_data, num_samples, replacement,
// num_categories, num_distributions);
// num_categories, num_distributions);
}
}
...
...
python/paddle/fluid/tests/unittests/test_multinomial_op.py
浏览文件 @
c66eec75
...
@@ -38,6 +38,7 @@ class TestMultinomialOp(OpTest):
...
@@ -38,6 +38,7 @@ class TestMultinomialOp(OpTest):
# input probability is a vector, and replacement is True
# input probability is a vector, and replacement is True
self
.
input_np
=
np
.
random
.
rand
(
4
)
self
.
input_np
=
np
.
random
.
rand
(
4
)
self
.
outputs
=
{
"Out"
:
np
.
zeros
(
100000
).
astype
(
"int64"
)}
self
.
outputs
=
{
"Out"
:
np
.
zeros
(
100000
).
astype
(
"int64"
)}
# self.outputs = {"yokiOut": np.zeros(4).astype("int64")}
self
.
attrs
=
{
"num_samples"
:
100000
,
"replacement"
:
True
}
self
.
attrs
=
{
"num_samples"
:
100000
,
"replacement"
:
True
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
@@ -53,19 +54,21 @@ class TestMultinomialOp(OpTest):
...
@@ -53,19 +54,21 @@ class TestMultinomialOp(OpTest):
# normalize the input to get the probability
# normalize the input to get the probability
prob
=
self
.
input_np
/
self
.
input_np
.
sum
(
axis
=-
1
,
keepdims
=
True
)
prob
=
self
.
input_np
/
self
.
input_np
.
sum
(
axis
=-
1
,
keepdims
=
True
)
sample_prob
=
self
.
sample_output
(
np
.
array
(
outs
[
0
]))
sample_prob
=
self
.
sample_output
(
np
.
array
(
outs
[
0
]))
print
(
"sample_prob: "
+
str
(
sample_prob
)
+
"
\n
prob: "
+
str
(
prob
))
# sample_prob = np.array(outs[0])
# print("input", self.input_np)
# print("sample_prob: " + str(sample_prob) + "\nprob: " + str(prob))
self
.
assertTrue
(
self
.
assertTrue
(
np
.
allclose
(
np
.
allclose
(
sample_prob
,
prob
,
rtol
=
0
,
atol
=
0.01
),
sample_prob
,
prob
,
rtol
=
0
,
atol
=
0.01
),
"sample_prob: "
+
str
(
sample_prob
)
+
"
\n
prob: "
+
str
(
prob
))
"sample_prob: "
+
str
(
sample_prob
)
+
"
\n
prob: "
+
str
(
prob
))
"""
class
TestMultinomialOp2
(
TestMultinomialOp
):
class
TestMultinomialOp2
(
TestMultinomialOp
):
def
init_data
(
self
):
def
init_data
(
self
):
# input probability is a matrix
# input probability is a matrix
self
.
input_np
=
np
.
random
.
rand
(
3
,
4
)
self
.
input_np
=
np
.
random
.
rand
(
3
,
4
)
self
.
outputs
=
{
"Out"
:
np
.
zeros
((
3
,
100000
)).
astype
(
"int64"
)}
self
.
outputs
=
{
"Out"
:
np
.
zeros
((
3
,
100000
)).
astype
(
"int64"
)}
# self.outputs = {"yokiOut": np.zeros((3, 4)).astype("int64")}
self
.
attrs
=
{
"num_samples"
:
100000
,
"replacement"
:
True
}
self
.
attrs
=
{
"num_samples"
:
100000
,
"replacement"
:
True
}
def
sample_output
(
self
,
out
):
def
sample_output
(
self
,
out
):
...
@@ -88,11 +91,13 @@ class TestMultinomialOp3(TestMultinomialOp):
...
@@ -88,11 +91,13 @@ class TestMultinomialOp3(TestMultinomialOp):
def
verify_output
(
self
,
outs
):
def
verify_output
(
self
,
outs
):
out
=
np
.
array
(
outs
[
0
])
out
=
np
.
array
(
outs
[
0
])
# print("op3out", out)
unique_out
=
np
.
unique
(
out
)
unique_out
=
np
.
unique
(
out
)
self
.
assertEqual
(
self
.
assertEqual
(
len
(
unique_out
),
100
,
len
(
unique_out
),
100
,
"replacement is False. categories can't be sampled repeatedly"
)
"replacement is False. categories can't be sampled repeatedly"
)
"""
"""
"""
class TestReplacementError(unittest.TestCase):
class TestReplacementError(unittest.TestCase):
def init_data(self):
def init_data(self):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录