Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
bbbd75e4
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bbbd75e4
编写于
1月 18, 2022
作者:
zhouweiwei2014
提交者:
GitHub
1月 18, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change CUDA implementaion of uniform/gaussian OP (#38611)
* change CUDA implementaion of uniform/gaussian OP * fix unittest
上级
a998c077
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
182 addition
and
20 deletion
+182
-20
paddle/fluid/operators/distribution_helper.h
paddle/fluid/operators/distribution_helper.h
+42
-2
paddle/fluid/operators/gaussian_random_op.cu
paddle/fluid/operators/gaussian_random_op.cu
+21
-7
paddle/fluid/operators/uniform_random_op.cu
paddle/fluid/operators/uniform_random_op.cu
+23
-8
paddle/fluid/platform/flags.cc
paddle/fluid/platform/flags.cc
+2
-0
paddle/scripts/paddle_build.bat
paddle/scripts/paddle_build.bat
+1
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+2
-0
python/paddle/fluid/tests/unittests/hybrid_parallel_mp_layers.py
...paddle/fluid/tests/unittests/hybrid_parallel_mp_layers.py
+1
-1
python/paddle/fluid/tests/unittests/test_adamw_op.py
python/paddle/fluid/tests/unittests/test_adamw_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
...n/paddle/fluid/tests/unittests/test_gaussian_random_op.py
+44
-0
python/paddle/fluid/tests/unittests/test_uniform_random_op.py
...on/paddle/fluid/tests/unittests/test_uniform_random_op.py
+44
-0
未找到文件。
paddle/fluid/operators/distribution_helper.h
浏览文件 @
bbbd75e4
...
@@ -21,17 +21,26 @@ limitations under the License. */
...
@@ -21,17 +21,26 @@ limitations under the License. */
#include <hiprand_kernel.h>
#include <hiprand_kernel.h>
#endif
#endif
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/hostdevice.h"
#if !defined(_WIN32)
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#else
// there is no equivalent intrinsics in msvc.
#define UNLIKELY(condition) (condition)
#endif
namespace
paddle
{
namespace
paddle
{
namespace
distribution
{
namespace
distribution
{
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
/********************* Transformation Function **********************/
template
<
typename
T
>
template
<
typename
T
>
struct
exponential_transform
{
struct
exponential_transform
{
explicit
exponential_transform
(
T
lambda
)
:
lambda_
(
lambda
)
{}
explicit
exponential_transform
(
T
lambda
)
:
lambda_
(
lambda
)
{}
...
@@ -52,7 +61,37 @@ struct exponential_transform {
...
@@ -52,7 +61,37 @@ struct exponential_transform {
T
lambda_
;
T
lambda_
;
};
};
template
<
typename
T
>
struct
uniform_transform
{
explicit
uniform_transform
(
T
min
,
T
max
)
:
range_
(
max
-
min
),
min_
(
min
)
{}
HOSTDEVICE
inline
T
operator
()(
T
val
)
const
{
if
(
UNLIKELY
(
val
==
static_cast
<
T
>
(
1.0
)))
{
return
min_
;
}
else
{
return
val
*
range_
+
min_
;
}
}
private:
T
range_
;
T
min_
;
};
template
<
typename
T
>
struct
normal_transform
{
explicit
normal_transform
(
T
mean
,
T
std
)
:
mean_
(
mean
),
std_
(
std
)
{}
HOSTDEVICE
inline
T
operator
()(
T
val
)
const
{
return
val
*
std_
+
mean_
;
}
private:
T
mean_
;
T
std_
;
};
#if defined(__NVCC__) || defined(__HIPCC__)
#if defined(__NVCC__) || defined(__HIPCC__)
/*********************** Distribution Function *************************/
template
<
typename
T
>
template
<
typename
T
>
struct
uniform_distribution
;
struct
uniform_distribution
;
...
@@ -132,6 +171,7 @@ struct normal_distribution<double> {
...
@@ -132,6 +171,7 @@ struct normal_distribution<double> {
};
};
#endif
#endif
/******** Launch GPU function of distribution and transformation *********/
template
<
typename
T
,
typename
DistOp
,
typename
TransformOp
>
template
<
typename
T
,
typename
DistOp
,
typename
TransformOp
>
__global__
void
DistributionKernel
(
size_t
size
,
uint64_t
seed
,
uint64_t
offset
,
__global__
void
DistributionKernel
(
size_t
size
,
uint64_t
seed
,
uint64_t
offset
,
DistOp
dist
,
TransformOp
trans
,
DistOp
dist
,
TransformOp
trans
,
...
@@ -151,8 +191,8 @@ __global__ void DistributionKernel(size_t size, uint64_t seed, uint64_t offset,
...
@@ -151,8 +191,8 @@ __global__ void DistributionKernel(size_t size, uint64_t seed, uint64_t offset,
for
(
size_t
j
=
0
;
j
<
returns_count
;
j
++
)
{
for
(
size_t
j
=
0
;
j
<
returns_count
;
j
++
)
{
size_t
index
=
i
+
j
*
total_thread
;
size_t
index
=
i
+
j
*
total_thread
;
if
(
index
<
size
)
{
if
(
index
<
size
)
{
auto
random
=
static_cast
<
T
>
((
&
random_tuple
.
x
)[
j
])
;
auto
random
=
(
&
random_tuple
.
x
)[
j
]
;
out_data
[
index
]
=
trans
(
random
);
out_data
[
index
]
=
static_cast
<
T
>
(
trans
(
random
)
);
}
}
}
}
}
}
...
...
paddle/fluid/operators/gaussian_random_op.cu
浏览文件 @
bbbd75e4
...
@@ -19,8 +19,11 @@ limitations under the License. */
...
@@ -19,8 +19,11 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/distribution_helper.h"
#include "paddle/fluid/operators/fill_constant_op.h"
#include "paddle/fluid/operators/fill_constant_op.h"
DECLARE_bool
(
use_curand
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -65,7 +68,10 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
...
@@ -65,7 +68,10 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
thrust
::
counting_iterator
<
int64_t
>
index_sequence_begin
(
0
);
thrust
::
counting_iterator
<
int64_t
>
index_sequence_begin
(
0
);
auto
shape
=
GetShape
(
context
);
auto
shape
=
GetShape
(
context
);
tensor
->
Resize
(
shape
);
tensor
->
Resize
(
shape
);
T
*
data
=
tensor
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
&
dev_cxt
=
context
.
template
device_context
<
platform
::
CUDADeviceContext
>();
T
*
data
=
tensor
->
mutable_data
<
T
>
(
dev_cxt
.
GetPlace
());
int64_t
size
=
tensor
->
numel
();
int64_t
size
=
tensor
->
numel
();
...
@@ -73,12 +79,20 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
...
@@ -73,12 +79,20 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
auto
gen_cuda
=
framework
::
GetDefaultCUDAGenerator
(
device_id
);
auto
gen_cuda
=
framework
::
GetDefaultCUDAGenerator
(
device_id
);
if
(
gen_cuda
->
GetIsInitPy
()
&&
seed_flag
)
{
if
(
gen_cuda
->
GetIsInitPy
()
&&
seed_flag
)
{
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
1
);
if
(
FLAGS_use_curand
)
{
int64_t
gen_offset
=
size
*
seed_offset
.
second
;
using
MT
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
thrust
::
transform
(
distribution
::
normal_distribution
<
MT
>
dist
;
index_sequence_begin
,
index_sequence_begin
+
size
,
distribution
::
normal_transform
<
MT
>
trans
(
mean
,
std
);
thrust
::
device_ptr
<
T
>
(
data
),
distribution
::
distribution_and_transform
<
T
>
(
dev_cxt
,
tensor
,
dist
,
GaussianGenerator
<
T
>
(
mean
,
std
,
seed_offset
.
first
,
gen_offset
));
trans
);
}
else
{
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
1
);
int64_t
gen_offset
=
size
*
seed_offset
.
second
;
thrust
::
transform
(
index_sequence_begin
,
index_sequence_begin
+
size
,
thrust
::
device_ptr
<
T
>
(
data
),
GaussianGenerator
<
T
>
(
mean
,
std
,
seed_offset
.
first
,
gen_offset
));
}
}
else
{
}
else
{
thrust
::
transform
(
index_sequence_begin
,
index_sequence_begin
+
size
,
thrust
::
transform
(
index_sequence_begin
,
index_sequence_begin
+
size
,
thrust
::
device_ptr
<
T
>
(
data
),
thrust
::
device_ptr
<
T
>
(
data
),
...
...
paddle/fluid/operators/uniform_random_op.cu
浏览文件 @
bbbd75e4
...
@@ -18,7 +18,12 @@ limitations under the License. */
...
@@ -18,7 +18,12 @@ limitations under the License. */
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/distribution_helper.h"
#include "paddle/fluid/operators/uniform_random_op.h"
#include "paddle/fluid/operators/uniform_random_op.h"
DECLARE_bool
(
use_curand
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -123,7 +128,9 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
...
@@ -123,7 +128,9 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
"unsupport type: %s."
,
"unsupport type: %s."
,
framework
::
ToTypeName
(
out_var
->
Type
())));
framework
::
ToTypeName
(
out_var
->
Type
())));
}
}
T
*
data
=
tensor
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
&
dev_cxt
=
context
.
template
device_context
<
platform
::
CUDADeviceContext
>();
T
*
data
=
tensor
->
mutable_data
<
T
>
(
dev_cxt
.
GetPlace
());
unsigned
int
seed
=
static_cast
<
unsigned
int
>
(
context
.
Attr
<
int
>
(
"seed"
));
unsigned
int
seed
=
static_cast
<
unsigned
int
>
(
context
.
Attr
<
int
>
(
"seed"
));
bool
seed_flag
=
false
;
bool
seed_flag
=
false
;
if
(
seed
==
0
)
{
if
(
seed
==
0
)
{
...
@@ -144,13 +151,21 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
...
@@ -144,13 +151,21 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
int
device_id
=
context
.
GetPlace
().
GetDeviceId
();
int
device_id
=
context
.
GetPlace
().
GetDeviceId
();
auto
gen_cuda
=
framework
::
GetDefaultCUDAGenerator
(
device_id
);
auto
gen_cuda
=
framework
::
GetDefaultCUDAGenerator
(
device_id
);
if
(
gen_cuda
->
GetIsInitPy
()
&&
seed_flag
)
{
if
(
gen_cuda
->
GetIsInitPy
()
&&
seed_flag
)
{
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
1
);
if
(
FLAGS_use_curand
)
{
int64_t
gen_offset
=
size
*
seed_offset
.
second
;
using
MT
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
thrust
::
transform
(
distribution
::
uniform_distribution
<
MT
>
dist
;
index_sequence_begin
,
index_sequence_begin
+
size
,
distribution
::
uniform_transform
<
MT
>
trans
(
min
,
max
);
thrust
::
device_ptr
<
T
>
(
data
),
distribution
::
distribution_and_transform
<
T
>
(
dev_cxt
,
tensor
,
dist
,
UniformGeneratorOffset
<
T
>
(
min
,
max
,
seed_offset
.
first
,
diag_num
,
trans
);
diag_step
,
diag_val
,
gen_offset
));
}
else
{
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
1
);
int64_t
gen_offset
=
size
*
seed_offset
.
second
;
thrust
::
transform
(
index_sequence_begin
,
index_sequence_begin
+
size
,
thrust
::
device_ptr
<
T
>
(
data
),
UniformGeneratorOffset
<
T
>
(
min
,
max
,
seed_offset
.
first
,
diag_num
,
diag_step
,
diag_val
,
gen_offset
));
}
}
else
{
}
else
{
thrust
::
transform
(
thrust
::
transform
(
index_sequence_begin
,
index_sequence_begin
+
size
,
index_sequence_begin
,
index_sequence_begin
+
size
,
...
...
paddle/fluid/platform/flags.cc
浏览文件 @
bbbd75e4
...
@@ -545,6 +545,8 @@ PADDLE_DEFINE_EXPORTED_double(
...
@@ -545,6 +545,8 @@ PADDLE_DEFINE_EXPORTED_double(
*/
*/
PADDLE_DEFINE_EXPORTED_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
PADDLE_DEFINE_EXPORTED_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
PADDLE_DEFINE_EXPORTED_bool
(
use_curand
,
false
,
"Random OP use CURAND"
);
/**
/**
* Debug related FLAG
* Debug related FLAG
* Name: FLAGS_call_stack_level
* Name: FLAGS_call_stack_level
...
...
paddle/scripts/paddle_build.bat
浏览文件 @
bbbd75e4
...
@@ -662,6 +662,7 @@ for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%#
...
@@ -662,6 +662,7 @@ for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%#
set
start
=
%start
:
~
4
,
10
%
set
start
=
%start
:
~
4
,
10
%
set
FLAGS_call_stack_level
=
2
set
FLAGS_call_stack_level
=
2
set
FLAGS_use_curand
=
True
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\openblas\lib
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\openblas\lib
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\openblas\bin
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\openblas\bin
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\zlib\bin
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\zlib\bin
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
bbbd75e4
...
@@ -61,6 +61,8 @@ function init() {
...
@@ -61,6 +61,8 @@ function init() {
# NOTE(chenweihang): For easy debugging, CI displays the C++ error stacktrace by default
# NOTE(chenweihang): For easy debugging, CI displays the C++ error stacktrace by default
export
FLAGS_call_stack_level
=
2
export
FLAGS_call_stack_level
=
2
export
FLAGS_use_curand
=
True
# set CI_SKIP_CPP_TEST if only *.py changed
# set CI_SKIP_CPP_TEST if only *.py changed
# In order to avoid using in some CI(such as daily performance), the current
# In order to avoid using in some CI(such as daily performance), the current
# branch must not be `${BRANCH}` which is usually develop.
# branch must not be `${BRANCH}` which is usually develop.
...
...
python/paddle/fluid/tests/unittests/hybrid_parallel_mp_layers.py
浏览文件 @
bbbd75e4
...
@@ -274,7 +274,7 @@ class TestDistTraning(unittest.TestCase):
...
@@ -274,7 +274,7 @@ class TestDistTraning(unittest.TestCase):
seq_length
=
16
seq_length
=
16
class_size_per_card
=
2
class_size_per_card
=
2
vocab_size
=
class_size_per_card
*
self
.
model_parallel_size
vocab_size
=
class_size_per_card
*
self
.
model_parallel_size
seed
=
10
25
seed
=
10
0
set_random_seed
(
seed
)
set_random_seed
(
seed
)
rank_id
=
dist
.
get_rank
()
rank_id
=
dist
.
get_rank
()
...
...
python/paddle/fluid/tests/unittests/test_adamw_op.py
浏览文件 @
bbbd75e4
...
@@ -333,7 +333,7 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp):
...
@@ -333,7 +333,7 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp):
lr_ratio
=
simple_lr_fun
)
lr_ratio
=
simple_lr_fun
)
loss_ref
=
np
.
array
(
loss_ref
=
np
.
array
(
[
4.8383293
,
3.084947
,
1.3323904
,
-
0.41943002
,
-
2.1710064
])
[
-
1.7267396
,
-
2.81524
,
-
3.9250019
,
-
5.05954
,
-
6.2272625
])
for
i
in
range
(
5
):
for
i
in
range
(
5
):
a1
=
linear1
(
a
)
a1
=
linear1
(
a
)
out
=
linear2
(
a1
)
out
=
linear2
(
a1
)
...
@@ -379,7 +379,7 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp):
...
@@ -379,7 +379,7 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp):
exe
.
run
(
startup
)
exe
.
run
(
startup
)
loss_ref
=
np
.
array
(
loss_ref
=
np
.
array
(
[
0.3
6120513
,
0.2720821
,
0.67208904
,
0.14607805
,
0.24098626
])
[
0.3
3895183
,
0.3159437
,
0.19472016
,
0.17764759
,
0.1520702
])
for
i
in
range
(
5
):
for
i
in
range
(
5
):
inputs
=
np
.
random
.
random
(
size
=
[
8
,
10
]).
astype
(
'float32'
)
inputs
=
np
.
random
.
random
(
size
=
[
8
,
10
]).
astype
(
'float32'
)
outputs
=
np
.
random
.
random
(
size
=
[
8
,
1
]).
astype
(
'float32'
)
outputs
=
np
.
random
.
random
(
size
=
[
8
,
1
]).
astype
(
'float32'
)
...
...
python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
浏览文件 @
bbbd75e4
...
@@ -287,5 +287,49 @@ class TestStandardNormalDtype(unittest.TestCase):
...
@@ -287,5 +287,49 @@ class TestStandardNormalDtype(unittest.TestCase):
paddle
.
enable_static
()
paddle
.
enable_static
()
class
TestRandomValue
(
unittest
.
TestCase
):
def
test_fixed_random_number
(
self
):
# Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t'
if
not
paddle
.
is_compiled_with_cuda
():
return
# Note(zhouwei): The Number of threads is determined by
# 'multiProcessorCount * maxThreadsPerMultiProcessor'. So, different
# GPU have different number of threads, which result in different
# random value. Only test on V100 GPU here.
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
return
def
_check_random_value
(
dtype
,
expect
,
expect_mean
,
expect_std
):
x
=
paddle
.
randn
([
32
,
3
,
1024
,
1024
],
dtype
=
dtype
)
actual
=
x
.
numpy
()
self
.
assertTrue
(
np
.
allclose
(
actual
[
2
,
1
,
512
,
1000
:
1010
],
expect
))
self
.
assertTrue
(
np
.
mean
(
actual
),
expect_mean
)
self
.
assertTrue
(
np
.
std
(
actual
),
expect_std
)
print
(
"Test Fixed Random number on V100 GPU------>"
)
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
seed
(
2021
)
expect
=
[
-
0.79037829
,
-
0.54411126
,
-
0.32266671
,
0.35791815
,
1.44169267
,
-
0.87785644
,
-
1.23909874
,
-
2.18194139
,
0.49489656
,
0.40703062
]
expect_mean
=
-
0.0000053026194133403266873214888799115129813799285329878330230713
expect_std
=
0.99999191058126390974081232343451119959354400634765625
_check_random_value
(
core
.
VarDesc
.
VarType
.
FP64
,
expect
,
expect_mean
,
expect_std
)
expect
=
[
-
0.7988942
,
1.8644791
,
0.02782744
,
1.3692524
,
0.6419724
,
0.12436751
,
0.12058455
,
-
1.9984808
,
1.5635862
,
0.18506318
]
expect_mean
=
-
0.00004762359094456769526004791259765625
expect_std
=
0.999975681304931640625
_check_random_value
(
core
.
VarDesc
.
VarType
.
FP32
,
expect
,
expect_mean
,
expect_std
)
paddle
.
enable_static
()
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_uniform_random_op.py
浏览文件 @
bbbd75e4
...
@@ -562,5 +562,49 @@ class TestUniformDtype(unittest.TestCase):
...
@@ -562,5 +562,49 @@ class TestUniformDtype(unittest.TestCase):
paddle
.
enable_static
()
paddle
.
enable_static
()
class
TestRandomValue
(
unittest
.
TestCase
):
def
test_fixed_random_number
(
self
):
# Test GPU Fixed random number, which is generated by 'curandStatePhilox4_32_10_t'
if
not
paddle
.
is_compiled_with_cuda
():
return
# Note(zhouwei): The Number of threads is determined by
# 'multiProcessorCount * maxThreadsPerMultiProcessor'. So, different
# GPU have different number of threads, which result in different
# random value. Only test on V100 GPU here.
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
return
def
_check_random_value
(
dtype
,
expect
,
expect_mean
,
expect_std
):
x
=
paddle
.
rand
([
32
,
3
,
1024
,
1024
],
dtype
=
dtype
)
actual
=
x
.
numpy
()
self
.
assertTrue
(
np
.
allclose
(
actual
[
2
,
1
,
512
,
1000
:
1010
],
expect
))
self
.
assertEqual
(
np
.
mean
(
actual
),
expect_mean
)
self
.
assertEqual
(
np
.
std
(
actual
),
expect_std
)
print
(
"Test Fixed Random number on V100 GPU------>"
)
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
seed
(
2021
)
expect
=
[
0.55298901
,
0.65184678
,
0.49375412
,
0.57943639
,
0.16459608
,
0.67181056
,
0.03021481
,
0.0238559
,
0.07742096
,
0.55972187
]
expect_mean
=
0.50000454338820143895816272561205551028251647949218750
expect_std
=
0.28867379167297479991560749112977646291255950927734375
_check_random_value
(
core
.
VarDesc
.
VarType
.
FP64
,
expect
,
expect_mean
,
expect_std
)
expect
=
[
0.45320973
,
0.17582087
,
0.725341
,
0.30849215
,
0.622257
,
0.46352342
,
0.97228295
,
0.12771158
,
0.286525
,
0.9810645
]
expect_mean
=
0.50002604722976684570312500
expect_std
=
0.2886914908885955810546875
_check_random_value
(
core
.
VarDesc
.
VarType
.
FP32
,
expect
,
expect_mean
,
expect_std
)
paddle
.
enable_static
()
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录