Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
9714878c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9714878c
编写于
4月 07, 2022
作者:
zhouweiwei2014
提交者:
GitHub
4月 07, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove FLAGS_use_curand and change all random op CUDA implementation (#41308)
上级
0d642d3a
变更
26
显示空白变更内容
内联
并排
Showing
26 changed file
with
267 addition
and
625 deletion
+267
-625
paddle/fluid/operators/dropout_impl.cu.h
paddle/fluid/operators/dropout_impl.cu.h
+40
-111
paddle/fluid/operators/gaussian_random_op.cu
paddle/fluid/operators/gaussian_random_op.cu
+0
-7
paddle/fluid/operators/uniform_random_op.h
paddle/fluid/operators/uniform_random_op.h
+4
-50
paddle/fluid/platform/flags.cc
paddle/fluid/platform/flags.cc
+0
-2
paddle/phi/kernels/cpu/transpose_kernel.cc
paddle/phi/kernels/cpu/transpose_kernel.cc
+1
-0
paddle/phi/kernels/gpu/bernoulli_kernel.cu
paddle/phi/kernels/gpu/bernoulli_kernel.cu
+8
-51
paddle/phi/kernels/gpu/gaussian_random_kernel.cu
paddle/phi/kernels/gpu/gaussian_random_kernel.cu
+5
-20
paddle/phi/kernels/gpu/multinomial_kernel.cu
paddle/phi/kernels/gpu/multinomial_kernel.cu
+57
-156
paddle/phi/kernels/gpu/randint_kernel.cu
paddle/phi/kernels/gpu/randint_kernel.cu
+3
-33
paddle/phi/kernels/gpu/randperm_kernel.cu
paddle/phi/kernels/gpu/randperm_kernel.cu
+59
-85
paddle/phi/kernels/gpu/uniform_random_kernel.cu
paddle/phi/kernels/gpu/uniform_random_kernel.cu
+5
-56
paddle/scripts/paddle_build.bat
paddle/scripts/paddle_build.bat
+0
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+0
-2
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+8
-8
python/paddle/fluid/tests/unittests/test_bernoulli_op.py
python/paddle/fluid/tests/unittests/test_bernoulli_op.py
+0
-3
python/paddle/fluid/tests/unittests/test_dropout_op.py
python/paddle/fluid/tests/unittests/test_dropout_op.py
+0
-3
python/paddle/fluid/tests/unittests/test_exponential_op.py
python/paddle/fluid/tests/unittests/test_exponential_op.py
+0
-3
python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
...n/paddle/fluid/tests/unittests/test_gaussian_random_op.py
+0
-3
python/paddle/fluid/tests/unittests/test_linear.py
python/paddle/fluid/tests/unittests/test_linear.py
+16
-0
python/paddle/fluid/tests/unittests/test_multinomial_op.py
python/paddle/fluid/tests/unittests/test_multinomial_op.py
+0
-3
python/paddle/fluid/tests/unittests/test_poisson_op.py
python/paddle/fluid/tests/unittests/test_poisson_op.py
+0
-3
python/paddle/fluid/tests/unittests/test_randint_op.py
python/paddle/fluid/tests/unittests/test_randint_op.py
+0
-3
python/paddle/fluid/tests/unittests/test_randperm_op.py
python/paddle/fluid/tests/unittests/test_randperm_op.py
+0
-3
python/paddle/fluid/tests/unittests/test_uniform_random_op.py
...on/paddle/fluid/tests/unittests/test_uniform_random_op.py
+27
-18
python/paddle/nn/utils/__init__.py
python/paddle/nn/utils/__init__.py
+1
-1
python/paddle/nn/utils/transform_parameters.py
python/paddle/nn/utils/transform_parameters.py
+33
-0
未找到文件。
paddle/fluid/operators/dropout_impl.cu.h
浏览文件 @
9714878c
...
@@ -38,43 +38,9 @@ limitations under the License. */
...
@@ -38,43 +38,9 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/functors.h"
#include "paddle/phi/kernels/funcs/functors.h"
DECLARE_bool
(
use_curand
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
template
<
typename
T1
,
typename
T2
=
T1
,
typename
OutT
=
T1
>
struct
DstMaskGenerator
{
const
float
dropout_prob_
;
const
bool
is_upscale_in_train_
;
using
MT
=
typename
details
::
MPTypeTrait
<
T1
>::
Type
;
MT
factor
;
HOSTDEVICE
inline
DstMaskGenerator
(
const
float
dropout_prob
,
const
bool
is_upscale_in_train
)
:
dropout_prob_
(
dropout_prob
),
is_upscale_in_train_
(
is_upscale_in_train
)
{
factor
=
static_cast
<
MT
>
(
1.0
f
/
(
1.0
f
-
dropout_prob_
));
}
HOSTDEVICE
inline
void
operator
()(
OutT
*
dst
,
const
T1
*
src_val
,
const
T2
*
rand
,
int
num
)
const
{
static
constexpr
int
kCount
=
phi
::
funcs
::
uniform_distribution
<
T2
>::
kReturnsCount
;
// 0 ~ kCount -1 is dist , kCount ~ 2 * kCount - 1 is mask
#pragma unroll
for
(
int
i
=
0
;
i
<
kCount
;
i
++
)
{
if
(
rand
[
i
]
<
dropout_prob_
)
{
dst
[
i
]
=
static_cast
<
T1
>
(
0
);
dst
[
i
+
kCount
]
=
dst
[
i
];
}
else
{
dst
[
i
]
=
is_upscale_in_train_
?
static_cast
<
T1
>
(
static_cast
<
MT
>
(
src_val
[
i
])
*
factor
)
:
static_cast
<
T1
>
(
src_val
[
i
]);
dst
[
i
+
kCount
]
=
static_cast
<
T1
>
(
1
);
}
}
}
};
template
<
typename
T1
,
typename
T2
=
T1
,
typename
OutT
=
T1
>
template
<
typename
T1
,
typename
T2
=
T1
,
typename
OutT
=
T1
>
struct
DstMaskFunctor
{
struct
DstMaskFunctor
{
const
float
retain_prob_
;
const
float
retain_prob_
;
...
@@ -113,7 +79,7 @@ __global__ void VectorizedRandomGenerator(const size_t n, uint64_t seed,
...
@@ -113,7 +79,7 @@ __global__ void VectorizedRandomGenerator(const size_t n, uint64_t seed,
const
T
*
src
,
MaskType
*
mask
,
T
*
dst
,
const
T
*
src
,
MaskType
*
mask
,
T
*
dst
,
bool
is_upscale_in_train
,
bool
is_upscale_in_train
,
uint64_t
increment
,
uint64_t
increment
,
size_t
main_offset
,
bool
use_curand
)
{
size_t
main_offset
)
{
size_t
idx
=
static_cast
<
size_t
>
(
BLOCK_ID_X
*
BLOCK_NUM_X
);
size_t
idx
=
static_cast
<
size_t
>
(
BLOCK_ID_X
*
BLOCK_NUM_X
);
static
constexpr
int
kCount
=
static
constexpr
int
kCount
=
phi
::
funcs
::
uniform_distribution
<
float
>::
kReturnsCount
;
phi
::
funcs
::
uniform_distribution
<
float
>::
kReturnsCount
;
...
@@ -135,7 +101,7 @@ __global__ void VectorizedRandomGenerator(const size_t n, uint64_t seed,
...
@@ -135,7 +101,7 @@ __global__ void VectorizedRandomGenerator(const size_t n, uint64_t seed,
int
deal_size
=
BLOCK_NUM_X
*
kCount
;
int
deal_size
=
BLOCK_NUM_X
*
kCount
;
size_t
fix
=
idx
*
kCount
;
size_t
fix
=
idx
*
kCount
;
if
(
use_curand
)
{
auto
dst_functor
=
auto
dst_functor
=
DstMaskFunctor
<
T
,
float
>
(
1.0
f
-
dropout_prob
,
is_upscale_in_train
);
DstMaskFunctor
<
T
,
float
>
(
1.0
f
-
dropout_prob
,
is_upscale_in_train
);
for
(;
fix
<
main_offset
;
fix
+=
stride
)
{
for
(;
fix
<
main_offset
;
fix
+=
stride
)
{
...
@@ -145,8 +111,7 @@ __global__ void VectorizedRandomGenerator(const size_t n, uint64_t seed,
...
@@ -145,8 +111,7 @@ __global__ void VectorizedRandomGenerator(const size_t n, uint64_t seed,
// dst
// dst
kps
::
OperatorTernary
<
T
,
float
,
T
,
DstMaskFunctor
<
T
,
float
>>
(
kps
::
OperatorTernary
<
T
,
float
,
T
,
DstMaskFunctor
<
T
,
float
>>
(
&
dst_mask
[
0
],
&
dst_mask
[
0
],
&
rands
[
0
],
dst_functor
,
kCount
);
&
dst_mask
[
0
],
&
dst_mask
[
0
],
&
rands
[
0
],
dst_functor
,
kCount
);
kps
::
WriteData
<
T
,
kCount
,
1
,
1
,
false
>
(
dst
+
fix
,
&
dst_mask
[
0
],
kps
::
WriteData
<
T
,
kCount
,
1
,
1
,
false
>
(
dst
+
fix
,
&
dst_mask
[
0
],
deal_size
);
deal_size
);
// mask
// mask
kps
::
ElementwiseUnary
<
T
,
MaskType
,
kCount
,
1
,
1
,
Cast
>
(
kps
::
ElementwiseUnary
<
T
,
MaskType
,
kCount
,
1
,
1
,
Cast
>
(
&
mask_result
[
0
],
&
dst_mask
[
kCount
],
Cast
());
&
mask_result
[
0
],
&
dst_mask
[
kCount
],
Cast
());
...
@@ -172,40 +137,6 @@ __global__ void VectorizedRandomGenerator(const size_t n, uint64_t seed,
...
@@ -172,40 +137,6 @@ __global__ void VectorizedRandomGenerator(const size_t n, uint64_t seed,
remainder
);
remainder
);
__syncthreads
();
__syncthreads
();
}
}
}
else
{
auto
dst_functor
=
DstMaskGenerator
<
T
,
float
>
(
dropout_prob
,
is_upscale_in_train
);
for
(;
fix
<
main_offset
;
fix
+=
stride
)
{
kps
::
ReadData
<
T
,
kCount
,
1
,
1
,
false
>
(
&
dst_mask
[
0
],
src
+
fix
,
deal_size
);
kps
::
ElementwiseRandom
<
SType
,
float
,
kCount
,
1
,
Rand
>
(
&
rands
[
0
],
Rand
(),
&
state
);
// dst
kps
::
OperatorTernary
<
T
,
float
,
T
,
DstMaskGenerator
<
T
,
float
>>
(
&
dst_mask
[
0
],
&
dst_mask
[
0
],
&
rands
[
0
],
dst_functor
,
kCount
);
kps
::
WriteData
<
T
,
kCount
,
1
,
1
,
false
>
(
dst
+
fix
,
&
dst_mask
[
0
],
deal_size
);
// mask
kps
::
ElementwiseUnary
<
T
,
MaskType
,
kCount
,
1
,
1
,
Cast
>
(
&
mask_result
[
0
],
&
dst_mask
[
kCount
],
Cast
());
kps
::
WriteData
<
MaskType
,
kCount
,
1
,
1
,
false
>
(
mask
+
fix
,
&
mask_result
[
0
],
deal_size
);
}
int
remainder
=
n
-
fix
;
if
(
remainder
>
0
)
{
kps
::
ReadData
<
T
,
kCount
,
1
,
1
,
true
>
(
&
dst_mask
[
0
],
src
+
fix
,
remainder
);
kps
::
ElementwiseRandom
<
SType
,
float
,
kCount
,
1
,
Rand
>
(
&
rands
[
0
],
Rand
(),
&
state
);
// dst
kps
::
OperatorTernary
<
T
,
float
,
T
,
DstMaskGenerator
<
T
,
float
>>
(
&
dst_mask
[
0
],
&
dst_mask
[
0
],
&
rands
[
0
],
dst_functor
,
kCount
);
kps
::
WriteData
<
T
,
kCount
,
1
,
1
,
true
>
(
dst
+
fix
,
&
dst_mask
[
0
],
remainder
);
// mask
kps
::
ElementwiseUnary
<
T
,
MaskType
,
kCount
,
1
,
1
,
Cast
>
(
&
mask_result
[
0
],
&
dst_mask
[
kCount
],
Cast
());
kps
::
WriteData
<
MaskType
,
kCount
,
1
,
1
,
true
>
(
mask
+
fix
,
&
mask_result
[
0
],
remainder
);
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -251,13 +182,11 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx, bool is_test,
...
@@ -251,13 +182,11 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx, bool is_test,
size_t
grid_size
=
gpu_config
.
GetGridSize
();
size_t
grid_size
=
gpu_config
.
GetGridSize
();
size_t
block_size
=
gpu_config
.
GetBlockSize
();
size_t
block_size
=
gpu_config
.
GetBlockSize
();
if
(
FLAGS_use_curand
)
{
int64_t
device_id
=
dev_ctx
.
GetPlace
().
GetDeviceId
();
int64_t
device_id
=
dev_ctx
.
GetPlace
().
GetDeviceId
();
const
auto
&
prop
=
platform
::
GetDeviceProperties
(
device_id
);
const
auto
&
prop
=
platform
::
GetDeviceProperties
(
device_id
);
size_t
max_grid_size
=
prop
.
maxThreadsPerMultiProcessor
*
size_t
max_grid_size
=
prop
.
maxThreadsPerMultiProcessor
*
prop
.
multiProcessorCount
/
block_size
;
prop
.
multiProcessorCount
/
block_size
;
grid_size
=
std
::
min
(
grid_size
,
max_grid_size
);
grid_size
=
std
::
min
(
grid_size
,
max_grid_size
);
}
auto
offset
=
auto
offset
=
((
x_numel
-
1
)
/
(
grid_size
*
block_size
*
kVecSize
)
+
1
)
*
kVecSize
;
((
x_numel
-
1
)
/
(
grid_size
*
block_size
*
kVecSize
)
+
1
)
*
kVecSize
;
...
@@ -268,7 +197,7 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx, bool is_test,
...
@@ -268,7 +197,7 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx, bool is_test,
VectorizedRandomGenerator
<
T
,
uint8_t
><<<
grid_size
,
block_size
,
0
,
stream
>>>
(
VectorizedRandomGenerator
<
T
,
uint8_t
><<<
grid_size
,
block_size
,
0
,
stream
>>>
(
size
,
seed_data
,
dropout_prob
,
x_data
,
mask_data
,
y_data
,
size
,
seed_data
,
dropout_prob
,
x_data
,
mask_data
,
y_data
,
upscale_in_train
,
increment
,
main_offset
,
FLAGS_use_curand
);
upscale_in_train
,
increment
,
main_offset
);
}
else
{
}
else
{
if
(
upscale_in_train
)
{
if
(
upscale_in_train
)
{
// todo: can y share with data with x directly?
// todo: can y share with data with x directly?
...
...
paddle/fluid/operators/gaussian_random_op.cu
浏览文件 @
9714878c
...
@@ -11,21 +11,14 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,21 +11,14 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/random.h>
#include <thrust/random.h>
#include <thrust/transform.h>
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/fill_constant_op.h"
#include "paddle/fluid/operators/fill_constant_op.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/index_impl.cu.h"
#include "paddle/phi/kernels/funcs/index_impl.cu.h"
DECLARE_bool
(
use_curand
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/operators/uniform_random_op.h
浏览文件 @
9714878c
...
@@ -19,11 +19,7 @@
...
@@ -19,11 +19,7 @@
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#if defined(__NVCC__) || defined(__HIPCC__)
#if defined(__NVCC__) || defined(__HIPCC__)
DECLARE_bool
(
use_curand
);
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/random.h>
#include <thrust/random.h>
#include <thrust/transform.h>
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
...
@@ -146,39 +142,6 @@ struct UniformGenerator {
...
@@ -146,39 +142,6 @@ struct UniformGenerator {
}
}
};
};
template
<
typename
T
>
struct
UniformGeneratorOffset
{
T
min_
,
max_
;
unsigned
int
seed_
;
T
diag_val_
;
unsigned
int
diag_num_
;
unsigned
int
diag_step_
;
int
offset_
;
__host__
__device__
UniformGeneratorOffset
(
T
min
,
T
max
,
int
seed
,
int
diag_num
,
int
diag_step
,
T
diag_val
,
int
offset
)
:
min_
(
min
),
max_
(
max
),
seed_
(
seed
),
diag_num_
(
diag_num
),
diag_step_
(
diag_step
),
diag_val_
(
diag_val
),
offset_
(
offset
)
{}
__host__
__device__
T
operator
()(
const
unsigned
int
n
)
const
{
thrust
::
minstd_rand
rng
;
rng
.
seed
(
seed_
);
thrust
::
uniform_real_distribution
<
T
>
dist
(
min_
,
max_
);
rng
.
discard
(
n
+
offset_
);
T
out
=
dist
(
rng
);
unsigned
int
remainder
=
n
%
(
diag_step_
+
1
);
if
(
remainder
==
0
&&
diag_num_
>
n
/
(
diag_step_
+
1
))
{
out
=
diag_val_
;
}
return
out
;
}
};
template
<
typename
T
>
template
<
typename
T
>
void
UniformRandom
(
const
framework
::
ExecutionContext
&
context
,
void
UniformRandom
(
const
framework
::
ExecutionContext
&
context
,
framework
::
Tensor
*
tensor
)
{
framework
::
Tensor
*
tensor
)
{
...
@@ -205,19 +168,10 @@ void UniformRandom(const framework::ExecutionContext& context,
...
@@ -205,19 +168,10 @@ void UniformRandom(const framework::ExecutionContext& context,
int
device_id
=
context
.
GetPlace
().
GetDeviceId
();
int
device_id
=
context
.
GetPlace
().
GetDeviceId
();
auto
gen_cuda
=
framework
::
GetDefaultCUDAGenerator
(
device_id
);
auto
gen_cuda
=
framework
::
GetDefaultCUDAGenerator
(
device_id
);
if
(
gen_cuda
->
GetIsInitPy
()
&&
seed_flag
)
{
if
(
gen_cuda
->
GetIsInitPy
()
&&
seed_flag
)
{
if
(
FLAGS_use_curand
)
{
using
MT
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
details
::
MPTypeTrait
<
T
>::
Type
;
phi
::
funcs
::
uniform_distribution
<
MT
>
dist
;
phi
::
funcs
::
uniform_distribution
<
MT
>
dist
;
phi
::
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
,
max
);
phi
::
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
,
max
);
phi
::
funcs
::
distribution_and_transform
<
T
>
(
dev_cxt
,
tensor
,
dist
,
trans
);
phi
::
funcs
::
distribution_and_transform
<
T
>
(
dev_cxt
,
tensor
,
dist
,
trans
);
}
else
{
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
1
);
int64_t
gen_offset
=
size
*
seed_offset
.
second
;
auto
func
=
UniformGeneratorOffset
<
T
>
(
min
,
max
,
seed_offset
.
first
,
diag_num
,
diag_step
,
diag_val
,
gen_offset
);
phi
::
IndexKernel
<
T
,
UniformGeneratorOffset
<
T
>>
(
dev_cxt
,
tensor
,
func
);
}
}
else
{
}
else
{
auto
func
=
auto
func
=
UniformGenerator
<
T
>
(
min
,
max
,
seed
,
diag_num
,
diag_step
,
diag_val
);
UniformGenerator
<
T
>
(
min
,
max
,
seed
,
diag_num
,
diag_step
,
diag_val
);
...
...
paddle/fluid/platform/flags.cc
浏览文件 @
9714878c
...
@@ -545,8 +545,6 @@ PADDLE_DEFINE_EXPORTED_double(
...
@@ -545,8 +545,6 @@ PADDLE_DEFINE_EXPORTED_double(
*/
*/
PADDLE_DEFINE_EXPORTED_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
PADDLE_DEFINE_EXPORTED_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
PADDLE_DEFINE_EXPORTED_bool
(
use_curand
,
false
,
"Random OP use CURAND"
);
/**
/**
* Debug related FLAG
* Debug related FLAG
* Name: FLAGS_call_stack_level
* Name: FLAGS_call_stack_level
...
...
paddle/phi/kernels/cpu/transpose_kernel.cc
浏览文件 @
9714878c
...
@@ -75,6 +75,7 @@ PD_REGISTER_KERNEL(transpose,
...
@@ -75,6 +75,7 @@ PD_REGISTER_KERNEL(transpose,
double
,
double
,
int32_t
,
int32_t
,
int64_t
,
int64_t
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
,
phi
::
dtype
::
bfloat16
,
phi
::
dtype
::
complex
<
float
>
,
phi
::
dtype
::
complex
<
float
>
,
phi
::
dtype
::
complex
<
double
>
)
{}
phi
::
dtype
::
complex
<
double
>
)
{}
paddle/phi/kernels/gpu/bernoulli_kernel.cu
浏览文件 @
9714878c
...
@@ -14,8 +14,6 @@
...
@@ -14,8 +14,6 @@
#include "paddle/phi/kernels/bernoulli_kernel.h"
#include "paddle/phi/kernels/bernoulli_kernel.h"
#include <thrust/random.h>
#include <thrust/transform.h>
#ifdef __NVCC__
#ifdef __NVCC__
#include <curand_kernel.h>
#include <curand_kernel.h>
#endif
#endif
...
@@ -32,35 +30,8 @@
...
@@ -32,35 +30,8 @@
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/transform.h"
DECLARE_bool
(
use_curand
);
namespace
phi
{
namespace
phi
{
template
<
typename
T
>
struct
BernoulliCudaFunctor
{
unsigned
int
seed_
;
unsigned
int
offset_
;
__host__
__device__
BernoulliCudaFunctor
(
unsigned
int
seed
,
unsigned
int
offset
)
:
seed_
(
seed
),
offset_
(
offset
)
{}
__host__
__device__
T
operator
()(
const
unsigned
int
n
,
const
T
p
)
const
{
// NOTE(zhiqiu): currently, PADDLE_ENFORCE in cuda kernel may print several
// lines of error messages if, and it should be refined.
PADDLE_ENFORCE
(
p
>=
0.0
&&
p
<=
1.0
,
"The probability should be >=0 and <= 1, but got %f"
,
p
);
thrust
::
minstd_rand
rng
;
rng
.
seed
(
seed_
);
thrust
::
uniform_real_distribution
<
T
>
dist
(
0.0
,
1.0
);
rng
.
discard
(
n
+
offset_
);
return
static_cast
<
T
>
(
dist
(
rng
)
<
p
);
}
};
// 'curand_uniform4/hiprand_uniform4' generate 4 random number each time
// 'curand_uniform4/hiprand_uniform4' generate 4 random number each time
template
<
typename
T
>
template
<
typename
T
>
__global__
void
bernoulli_cuda_kernel
(
__global__
void
bernoulli_cuda_kernel
(
...
@@ -100,7 +71,6 @@ void BernoulliKernel(const Context& ctx,
...
@@ -100,7 +71,6 @@ void BernoulliKernel(const Context& ctx,
auto
gen_cuda
=
ctx
.
GetGenerator
();
auto
gen_cuda
=
ctx
.
GetGenerator
();
if
(
FLAGS_use_curand
)
{
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
12
);
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
12
);
uint64_t
seed
=
seed_offset
.
first
;
uint64_t
seed
=
seed_offset
.
first
;
uint64_t
offset
=
seed_offset
.
second
;
uint64_t
offset
=
seed_offset
.
second
;
...
@@ -111,19 +81,6 @@ void BernoulliKernel(const Context& ctx,
...
@@ -111,19 +81,6 @@ void BernoulliKernel(const Context& ctx,
bernoulli_cuda_kernel
<<<
grid_size
,
block_size
,
0
,
ctx
.
stream
()
>>>
(
bernoulli_cuda_kernel
<<<
grid_size
,
block_size
,
0
,
ctx
.
stream
()
>>>
(
numel
,
seed
,
offset
,
x_data
,
out_data
);
numel
,
seed
,
offset
,
x_data
,
out_data
);
}
else
{
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
1
);
int64_t
gen_offset
=
numel
*
seed_offset
.
second
;
paddle
::
platform
::
Transform
<
phi
::
GPUContext
>
trans
;
thrust
::
counting_iterator
<
int64_t
>
index_sequence_begin
(
0
);
trans
(
ctx
,
index_sequence_begin
,
index_sequence_begin
+
numel
,
x_data
,
out_data
,
BernoulliCudaFunctor
<
T
>
(
static_cast
<
int64_t
>
(
seed_offset
.
first
),
static_cast
<
int64_t
>
(
gen_offset
)));
}
}
}
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/gpu/gaussian_random_kernel.cu
浏览文件 @
9714878c
...
@@ -14,10 +14,7 @@
...
@@ -14,10 +14,7 @@
#include "paddle/phi/kernels/gaussian_random_kernel.h"
#include "paddle/phi/kernels/gaussian_random_kernel.h"
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/random.h>
#include <thrust/random.h>
#include <thrust/transform.h>
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
...
@@ -27,8 +24,6 @@
...
@@ -27,8 +24,6 @@
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/generator.h"
DECLARE_bool
(
use_curand
);
namespace
phi
{
namespace
phi
{
template
<
typename
T
>
template
<
typename
T
>
...
@@ -83,21 +78,11 @@ void GaussianRandomKernel(const Context& dev_ctx,
...
@@ -83,21 +78,11 @@ void GaussianRandomKernel(const Context& dev_ctx,
auto
gen_cuda
=
paddle
::
framework
::
GetDefaultCUDAGenerator
(
device_id
);
auto
gen_cuda
=
paddle
::
framework
::
GetDefaultCUDAGenerator
(
device_id
);
if
(
gen_cuda
->
GetIsInitPy
()
&&
seed_flag
)
{
if
(
gen_cuda
->
GetIsInitPy
()
&&
seed_flag
)
{
if
(
FLAGS_use_curand
)
{
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
funcs
::
normal_distribution
<
MT
>
dist
;
funcs
::
normal_distribution
<
MT
>
dist
;
funcs
::
normal_transform
<
MT
>
trans
(
static_cast
<
MT
>
(
mean
),
funcs
::
normal_transform
<
MT
>
trans
(
static_cast
<
MT
>
(
mean
),
static_cast
<
MT
>
(
std
));
static_cast
<
MT
>
(
std
));
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
tensor
,
dist
,
trans
);
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
tensor
,
dist
,
trans
);
}
else
{
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
1
);
int64_t
gen_offset
=
size
*
seed_offset
.
second
;
auto
func
=
GaussianGenerator
<
T
>
(
static_cast
<
T
>
(
mean
),
static_cast
<
T
>
(
std
),
seed_offset
.
first
,
gen_offset
);
IndexKernel
<
T
,
GaussianGenerator
<
T
>>
(
dev_ctx
,
tensor
,
func
);
}
}
else
{
}
else
{
auto
func
=
auto
func
=
GaussianGenerator
<
T
>
(
static_cast
<
T
>
(
mean
),
static_cast
<
T
>
(
std
),
seed
);
GaussianGenerator
<
T
>
(
static_cast
<
T
>
(
mean
),
static_cast
<
T
>
(
std
),
seed
);
...
...
paddle/phi/kernels/gpu/multinomial_kernel.cu
浏览文件 @
9714878c
...
@@ -18,11 +18,6 @@ limitations under the License. */
...
@@ -18,11 +18,6 @@ limitations under the License. */
#include "paddle/phi/kernels/multinomial_kernel.h"
#include "paddle/phi/kernels/multinomial_kernel.h"
#include <thrust/execution_policy.h>
#include <thrust/random.h>
#include <thrust/scan.h>
#include <thrust/transform.h>
#ifdef __NVCC__
#ifdef __NVCC__
#include "cub/cub.cuh"
#include "cub/cub.cuh"
#endif
#endif
...
@@ -44,12 +39,6 @@ namespace cub = hipcub;
...
@@ -44,12 +39,6 @@ namespace cub = hipcub;
#include "paddle/phi/kernels/funcs/multinomial_functor.h"
#include "paddle/phi/kernels/funcs/multinomial_functor.h"
#include "paddle/phi/kernels/top_k_kernel.h"
#include "paddle/phi/kernels/top_k_kernel.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/transform.h"
DECLARE_bool
(
use_curand
);
namespace
phi
{
namespace
phi
{
template
<
typename
T
>
template
<
typename
T
>
...
@@ -74,32 +63,6 @@ __global__ void NormalizeProbability(T* norm_probs,
...
@@ -74,32 +63,6 @@ __global__ void NormalizeProbability(T* norm_probs,
}
}
}
}
template
<
typename
T
>
__global__
void
GetCumulativeProbs
(
T
*
norm_probs_data
,
int64_t
num_distributions
,
int64_t
num_categories
,
T
*
cumulative_probs_data
)
{
int
id
=
blockIdx
.
x
;
thrust
::
inclusive_scan
(
thrust
::
device
,
norm_probs_data
+
id
*
num_categories
,
norm_probs_data
+
(
id
+
1
)
*
num_categories
,
cumulative_probs_data
+
id
*
num_categories
);
}
template
<
typename
T
>
struct
RandomGeneratorCudaFunctor
{
unsigned
int
seed_
;
__host__
__device__
RandomGeneratorCudaFunctor
(
int
seed
)
:
seed_
(
seed
)
{}
__host__
__device__
T
operator
()(
const
unsigned
int
n
)
const
{
thrust
::
minstd_rand
rng
;
rng
.
seed
(
seed_
);
thrust
::
uniform_real_distribution
<
T
>
dist
(
0.0
,
1.0
);
rng
.
discard
(
n
);
return
dist
(
rng
);
}
};
template
<
typename
T
>
template
<
typename
T
>
__device__
int
binarySearchFunctor
(
T
*
cumulative_probs_data
,
__device__
int
binarySearchFunctor
(
T
*
cumulative_probs_data
,
T
*
norm_probs_data
,
T
*
norm_probs_data
,
...
@@ -130,7 +93,6 @@ __device__ int binarySearchFunctor(T* cumulative_probs_data,
...
@@ -130,7 +93,6 @@ __device__ int binarySearchFunctor(T* cumulative_probs_data,
template
<
typename
T
>
template
<
typename
T
>
__global__
void
sampleMultinomialWithReplacement
(
__global__
void
sampleMultinomialWithReplacement
(
T
*
rng_data
,
const
int64_t
num_samples
,
const
int64_t
num_samples
,
int64_t
*
out_data
,
int64_t
*
out_data
,
const
int64_t
num_distributions
,
const
int64_t
num_distributions
,
...
@@ -138,10 +100,9 @@ __global__ void sampleMultinomialWithReplacement(
...
@@ -138,10 +100,9 @@ __global__ void sampleMultinomialWithReplacement(
T
*
cumulative_probs_data
,
T
*
cumulative_probs_data
,
T
*
norm_probs_data
,
T
*
norm_probs_data
,
uint64_t
seed
,
uint64_t
seed
,
uint64_t
offset
,
uint64_t
offset
)
{
bool
use_curand
)
{
// use binary search to get the selected category sample id.
// use binary search to get the selected category sample id.
// let cumulative_probs_data[id-1] < rng_
data
< cumulative_probs_data[id].
// let cumulative_probs_data[id-1] < rng_
number
< cumulative_probs_data[id].
size_t
idx
=
gridDim
.
x
*
blockDim
.
x
*
blockIdx
.
y
+
blockDim
.
x
*
blockIdx
.
x
+
size_t
idx
=
gridDim
.
x
*
blockDim
.
x
*
blockIdx
.
y
+
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
threadIdx
.
x
;
...
@@ -151,10 +112,7 @@ __global__ void sampleMultinomialWithReplacement(
...
@@ -151,10 +112,7 @@ __global__ void sampleMultinomialWithReplacement(
int
sample
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
sample
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
dist
=
blockIdx
.
y
;
dist
<
num_distributions
;
dist
+=
gridDim
.
y
)
{
for
(
int
dist
=
blockIdx
.
y
;
dist
<
num_distributions
;
dist
+=
gridDim
.
y
)
{
if
(
sample
<
num_samples
)
{
if
(
sample
<
num_samples
)
{
T
rng_number
=
rng_data
[
sample
+
dist
*
num_samples
];
T
rng_number
=
static_cast
<
T
>
(
curand_uniform4
(
&
state
).
x
);
if
(
use_curand
)
{
rng_number
=
static_cast
<
T
>
(
curand_uniform4
(
&
state
).
x
);
}
// Find the bucket that a uniform random number lies in
// Find the bucket that a uniform random number lies in
int
selected_category
=
int
selected_category
=
binarySearchFunctor
<
T
>
(
cumulative_probs_data
+
dist
*
num_categories
,
binarySearchFunctor
<
T
>
(
cumulative_probs_data
+
dist
*
num_categories
,
...
@@ -182,10 +140,7 @@ void MultinomialKernel(const Context& dev_ctx,
...
@@ -182,10 +140,7 @@ void MultinomialKernel(const Context& dev_ctx,
const
int64_t
num_distributions
=
in_rank
>
1
?
in_dims
[
in_rank
-
2
]
:
1
;
const
int64_t
num_distributions
=
in_rank
>
1
?
in_dims
[
in_rank
-
2
]
:
1
;
// If replacement is False, it's not a replaceable sample. Every category
// If replacement is False, it's not a replaceable sample. Every category
// can
// can be used only once.
// be used only once. So after every sample, probability of the distribution
// will change. The implementation can't be parallelizable. Thus, call CPU
// implementation ``funcs::MultinomialFunctor`` to sample the distribution.
if
(
!
replacement
)
{
if
(
!
replacement
)
{
int64_t
in_data_numel
=
x
.
numel
();
int64_t
in_data_numel
=
x
.
numel
();
int64_t
out_data_numel
=
out
->
numel
();
int64_t
out_data_numel
=
out
->
numel
();
...
@@ -202,7 +157,6 @@ void MultinomialKernel(const Context& dev_ctx,
...
@@ -202,7 +157,6 @@ void MultinomialKernel(const Context& dev_ctx,
in_data_numel
*
sizeof
(
T
),
in_data_numel
*
sizeof
(
T
),
cudaMemcpyDeviceToHost
);
cudaMemcpyDeviceToHost
);
#endif
#endif
if
(
FLAGS_use_curand
)
{
for
(
size_t
i
=
0
;
i
<
num_distributions
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
num_distributions
;
++
i
)
{
int
zero_num
=
0
;
int
zero_num
=
0
;
for
(
size_t
j
=
0
;
j
<
num_categories
;
++
j
)
{
for
(
size_t
j
=
0
;
j
<
num_categories
;
++
j
)
{
...
@@ -250,31 +204,6 @@ void MultinomialKernel(const Context& dev_ctx,
...
@@ -250,31 +204,6 @@ void MultinomialKernel(const Context& dev_ctx,
return
;
return
;
}
}
funcs
::
MultinomialFunctor
<
T
>
(
dev_ctx
,
cpu_out_data
,
cpu_in_data
,
num_samples
,
replacement
,
num_categories
,
num_distributions
);
#ifdef PADDLE_WITH_HIP
hipMemcpy
(
out_data
,
cpu_out_data
,
out_data_numel
*
sizeof
(
int64_t
),
hipMemcpyHostToDevice
);
#else
cudaMemcpy
(
out_data
,
cpu_out_data
,
out_data_numel
*
sizeof
(
int64_t
),
cudaMemcpyHostToDevice
);
#endif
delete
[]
cpu_in_data
;
delete
[]
cpu_out_data
;
return
;
}
// Sum of input may not be 1. To get probability in range [0, 1], calculate
// Sum of input may not be 1. To get probability in range [0, 1], calculate
// sum of each row of input, and then use the sum to normalize the input.
// sum of each row of input, and then use the sum to normalize the input.
// sum_row_data: sum of each row
// sum_row_data: sum of each row
...
@@ -322,7 +251,6 @@ void MultinomialKernel(const Context& dev_ctx,
...
@@ -322,7 +251,6 @@ void MultinomialKernel(const Context& dev_ctx,
auto
*
cumulative_probs_data
=
auto
*
cumulative_probs_data
=
dev_ctx
.
template
Alloc
<
T
>(
&
cumulative_probs_tensor
);
dev_ctx
.
template
Alloc
<
T
>(
&
cumulative_probs_tensor
);
if
(
FLAGS_use_curand
)
{
// 'phi::funcs::InclusiveScan' has higher accuracy than
// 'phi::funcs::InclusiveScan' has higher accuracy than
// 'thrust::inclusive_scan'
// 'thrust::inclusive_scan'
funcs
::
InclusiveScan
<
T
,
std
::
plus
<
T
>>
(
funcs
::
InclusiveScan
<
T
,
std
::
plus
<
T
>>
(
...
@@ -335,31 +263,6 @@ void MultinomialKernel(const Context& dev_ctx,
...
@@ -335,31 +263,6 @@ void MultinomialKernel(const Context& dev_ctx,
std
::
plus
<
T
>
(),
std
::
plus
<
T
>
(),
/*reverse=*/
false
,
/*reverse=*/
false
,
dev_ctx
);
dev_ctx
);
}
else
{
dim3
block_cumsum
(
1
);
dim3
grid_cumsum
(
num_distributions
);
GetCumulativeProbs
<
T
><<<
grid_cumsum
,
block_cumsum
,
0
,
dev_ctx
.
stream
()
>>>
(
norm_probs_data
,
num_distributions
,
num_categories
,
cumulative_probs_data
);
}
// Generate random number for each sample.
std
::
random_device
rd
;
auto
seed
=
rd
();
DenseTensor
rng_data_tensor
;
rng_data_tensor
.
Resize
({
num_distributions
,
num_samples
});
auto
*
rng_data
=
dev_ctx
.
template
Alloc
<
T
>(
&
rng_data_tensor
);
thrust
::
counting_iterator
<
int64_t
>
index_sequence_begin
(
0
);
paddle
::
platform
::
Transform
<
GPUContext
>
trans
;
trans
(
dev_ctx
,
index_sequence_begin
,
index_sequence_begin
+
num_distributions
*
num_samples
,
rng_data
,
RandomGeneratorCudaFunctor
<
T
>
(
seed
));
// Sample the multinomial distributions.
// Sample the multinomial distributions.
dim3
block
(
128
);
dim3
block
(
128
);
...
@@ -376,7 +279,6 @@ void MultinomialKernel(const Context& dev_ctx,
...
@@ -376,7 +279,6 @@ void MultinomialKernel(const Context& dev_ctx,
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
increment
);
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
increment
);
sampleMultinomialWithReplacement
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
sampleMultinomialWithReplacement
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
rng_data
,
num_samples
,
num_samples
,
out_data
,
out_data
,
num_distributions
,
num_distributions
,
...
@@ -384,8 +286,7 @@ void MultinomialKernel(const Context& dev_ctx,
...
@@ -384,8 +286,7 @@ void MultinomialKernel(const Context& dev_ctx,
cumulative_probs_data
,
cumulative_probs_data
,
norm_probs_data
,
norm_probs_data
,
seed_offset
.
first
,
seed_offset
.
first
,
seed_offset
.
second
,
seed_offset
.
second
);
FLAGS_use_curand
);
}
}
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/gpu/randint_kernel.cu
浏览文件 @
9714878c
...
@@ -23,8 +23,6 @@
...
@@ -23,8 +23,6 @@
// See Note [ Why still include the fluid headers? ]
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
DECLARE_bool
(
use_curand
);
namespace
phi
{
namespace
phi
{
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
...
@@ -37,37 +35,9 @@ void RandintRawKernel(const Context& dev_ctx,
...
@@ -37,37 +35,9 @@ void RandintRawKernel(const Context& dev_ctx,
DenseTensor
*
out
)
{
DenseTensor
*
out
)
{
out
->
Resize
(
phi
::
make_ddim
(
shape
.
GetData
()));
out
->
Resize
(
phi
::
make_ddim
(
shape
.
GetData
()));
T
*
data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
T
*
data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
if
(
FLAGS_use_curand
)
{
funcs
::
uniform_distribution
<
uint32_t
>
dist
;
funcs
::
uniform_distribution
<
uint32_t
>
dist
;
funcs
::
uniform_int_transform
<
T
,
uint32_t
>
trans
(
low
,
high
);
funcs
::
uniform_int_transform
<
T
,
uint32_t
>
trans
(
low
,
high
);
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
}
else
{
DenseTensor
tmp
;
tmp
.
Resize
(
phi
::
make_ddim
(
shape
.
GetData
()));
T
*
tmp_data
=
dev_ctx
.
template
HostAlloc
<
T
>(
&
tmp
);
std
::
shared_ptr
<
std
::
mt19937_64
>
engine
;
if
(
seed
)
{
engine
=
std
::
make_shared
<
std
::
mt19937_64
>
();
engine
->
seed
(
seed
);
}
else
{
engine
=
dev_ctx
.
GetHostGenerator
()
->
GetCPUEngine
();
}
std
::
uniform_int_distribution
<
T
>
dist
(
low
,
high
-
1
);
auto
numel
=
out
->
numel
();
for
(
int64_t
i
=
0
;
i
<
numel
;
++
i
)
{
tmp_data
[
i
]
=
dist
(
*
engine
);
}
paddle
::
memory
::
Copy
<
phi
::
GPUPlace
,
phi
::
Place
>
(
out
->
place
(),
data
,
tmp
.
place
(),
tmp_data
,
numel
*
paddle
::
experimental
::
SizeOf
(
out
->
dtype
()),
0
);
}
}
}
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
...
...
paddle/phi/kernels/gpu/randperm_kernel.cu
浏览文件 @
9714878c
...
@@ -84,7 +84,6 @@ __global__ void SwapRepeatKernel(
...
@@ -84,7 +84,6 @@ __global__ void SwapRepeatKernel(
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
RandpermRawKernel
(
void
RandpermRawKernel
(
const
Context
&
dev_ctx
,
int
n
,
DataType
dtype
,
int
seed
,
DenseTensor
*
out
)
{
const
Context
&
dev_ctx
,
int
n
,
DataType
dtype
,
int
seed
,
DenseTensor
*
out
)
{
if
(
FLAGS_use_curand
)
{
DenseTensor
key
;
DenseTensor
key
;
RandintKernel
<
int
,
Context
>
(
dev_ctx
,
RandintKernel
<
int
,
Context
>
(
dev_ctx
,
std
::
numeric_limits
<
int
>::
min
(),
std
::
numeric_limits
<
int
>::
min
(),
...
@@ -137,38 +136,13 @@ void RandpermRawKernel(
...
@@ -137,38 +136,13 @@ void RandpermRawKernel(
auto
gen_cuda
=
dev_ctx
.
GetGenerator
();
auto
gen_cuda
=
dev_ctx
.
GetGenerator
();
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
n
);
auto
seed_offset
=
gen_cuda
->
IncrementOffset
(
n
);
uint64_t
seed
=
seed_offset
.
first
;
uint64_t
offset
=
seed_offset
.
second
;
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
n
);
auto
config
=
phi
::
backends
::
gpu
::
GetGpuLaunchConfig1D
(
dev_ctx
,
n
);
SwapRepeatKernel
<
T
><<<
config
.
block_per_grid
.
x
,
SwapRepeatKernel
<
T
><<<
config
.
block_per_grid
.
x
,
config
.
thread_per_block
.
x
,
config
.
thread_per_block
.
x
,
0
,
0
,
dev_ctx
.
stream
()
>>>
(
dev_ctx
.
stream
()
>>>
(
key_out
.
data
<
int
>
(),
out_data
,
n
,
seed
,
offset
);
key_out
.
data
<
int
>
(),
out_data
,
n
,
seed_offset
.
first
,
seed_offset
.
second
);
}
else
{
DenseTensor
tmp
;
tmp
.
Resize
(
phi
::
make_ddim
({
n
}));
T
*
tmp_data
=
dev_ctx
.
template
HostAlloc
<
T
>(
&
tmp
);
std
::
shared_ptr
<
std
::
mt19937_64
>
engine
;
if
(
seed
)
{
engine
=
std
::
make_shared
<
std
::
mt19937_64
>
();
engine
->
seed
(
seed
);
}
else
{
engine
=
dev_ctx
.
GetHostGenerator
()
->
GetCPUEngine
();
}
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
tmp_data
[
i
]
=
static_cast
<
T
>
(
i
);
}
std
::
shuffle
(
tmp_data
,
tmp_data
+
n
,
*
engine
);
T
*
out_data
=
dev_ctx
.
template
Alloc
<
T
>(
out
);
auto
size
=
out
->
numel
()
*
paddle
::
experimental
::
SizeOf
(
out
->
dtype
());
paddle
::
memory
::
Copy
<
phi
::
GPUPlace
,
phi
::
Place
>
(
out
->
place
(),
out_data
,
tmp
.
place
(),
tmp_data
,
size
,
0
);
}
}
}
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
...
...
paddle/phi/kernels/gpu/uniform_random_kernel.cu
浏览文件 @
9714878c
...
@@ -14,14 +14,13 @@
...
@@ -14,14 +14,13 @@
#include "paddle/phi/kernels/uniform_random_kernel.h"
#include "paddle/phi/kernels/uniform_random_kernel.h"
#include <thrust/random.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/index_impl.cu.h"
#include "paddle/phi/kernels/funcs/index_impl.cu.h"
DECLARE_bool
(
use_curand
);
namespace
phi
{
namespace
phi
{
template
<
typename
T
>
template
<
typename
T
>
...
@@ -54,43 +53,6 @@ struct UniformGenerator {
...
@@ -54,43 +53,6 @@ struct UniformGenerator {
}
}
};
};
template
<
typename
T
>
struct
UniformGeneratorOffset
{
T
min_
,
max_
;
unsigned
int
seed_
;
T
diag_val_
;
unsigned
int
diag_num_
;
unsigned
int
diag_step_
;
int
offset_
;
__host__
__device__
UniformGeneratorOffset
(
T
min
,
T
max
,
int
seed
,
int
diag_num
,
int
diag_step
,
T
diag_val
,
int
offset
)
:
min_
(
min
),
max_
(
max
),
seed_
(
seed
),
diag_num_
(
diag_num
),
diag_step_
(
diag_step
),
diag_val_
(
diag_val
),
offset_
(
offset
)
{}
__host__
__device__
T
operator
()(
const
unsigned
int
n
)
const
{
thrust
::
minstd_rand
rng
;
rng
.
seed
(
seed_
);
thrust
::
uniform_real_distribution
<
T
>
dist
(
min_
,
max_
);
rng
.
discard
(
n
+
offset_
);
T
out
=
dist
(
rng
);
unsigned
int
remainder
=
n
%
(
diag_step_
+
1
);
if
(
remainder
==
0
&&
diag_num_
>
n
/
(
diag_step_
+
1
))
{
out
=
diag_val_
;
}
return
out
;
}
};
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
UniformRandomRawKernel
(
const
Context
&
dev_ctx
,
void
UniformRandomRawKernel
(
const
Context
&
dev_ctx
,
const
IntArray
&
shape
,
const
IntArray
&
shape
,
...
@@ -114,23 +76,10 @@ void UniformRandomRawKernel(const Context& dev_ctx,
...
@@ -114,23 +76,10 @@ void UniformRandomRawKernel(const Context& dev_ctx,
auto
generator
=
dev_ctx
.
GetGenerator
();
auto
generator
=
dev_ctx
.
GetGenerator
();
if
(
generator
->
GetIsInitPy
()
&&
seed_flag
)
{
if
(
generator
->
GetIsInitPy
()
&&
seed_flag
)
{
if
(
FLAGS_use_curand
)
{
using
MT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
using
MT
=
typename
kps
::
details
::
MPTypeTrait
<
T
>::
Type
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_distribution
<
MT
>
dist
;
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
,
max
);
funcs
::
uniform_real_transform
<
MT
>
trans
(
min
,
max
);
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
funcs
::
distribution_and_transform
<
T
>
(
dev_ctx
,
out
,
dist
,
trans
);
}
else
{
auto
seed_offset
=
generator
->
IncrementOffset
(
1
);
int64_t
gen_offset
=
size
*
seed_offset
.
second
;
auto
func
=
UniformGeneratorOffset
<
T
>
(
min
,
max
,
seed_offset
.
first
,
diag_num
,
diag_step
,
diag_val
,
gen_offset
);
IndexKernel
<
T
,
UniformGeneratorOffset
<
T
>>
(
dev_ctx
,
out
,
func
);
}
}
else
{
}
else
{
auto
func
=
auto
func
=
UniformGenerator
<
T
>
(
min
,
max
,
seed
,
diag_num
,
diag_step
,
diag_val
);
UniformGenerator
<
T
>
(
min
,
max
,
seed
,
diag_num
,
diag_step
,
diag_val
);
...
...
paddle/scripts/paddle_build.bat
浏览文件 @
9714878c
...
@@ -657,7 +657,6 @@ for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%#
...
@@ -657,7 +657,6 @@ for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%#
set
start
=
%start
:
~
4
,
10
%
set
start
=
%start
:
~
4
,
10
%
set
FLAGS_call_stack_level
=
2
set
FLAGS_call_stack_level
=
2
set
FLAGS_use_curand
=
True
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\openblas\lib
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\openblas\lib
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\openblas\bin
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\openblas\bin
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\zlib\bin
dir
%THIRD
_PARTY_PATH:/
=
\
%
\install\zlib\bin
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
9714878c
...
@@ -61,8 +61,6 @@ function init() {
...
@@ -61,8 +61,6 @@ function init() {
# NOTE(chenweihang): For easy debugging, CI displays the C++ error stacktrace by default
# NOTE(chenweihang): For easy debugging, CI displays the C++ error stacktrace by default
export
FLAGS_call_stack_level
=
2
export
FLAGS_call_stack_level
=
2
export
FLAGS_use_curand
=
True
# set CI_SKIP_CPP_TEST if only *.py changed
# set CI_SKIP_CPP_TEST if only *.py changed
# In order to avoid using in some CI(such as daily performance), the current
# In order to avoid using in some CI(such as daily performance), the current
# branch must not be `${BRANCH}` which is usually develop.
# branch must not be `${BRANCH}` which is usually develop.
...
...
python/paddle/fluid/initializer.py
浏览文件 @
9714878c
...
@@ -561,12 +561,12 @@ class XavierInitializer(Initializer):
...
@@ -561,12 +561,12 @@ class XavierInitializer(Initializer):
if
framework
.
_non_static_mode
():
if
framework
.
_non_static_mode
():
if
self
.
_uniform
:
if
self
.
_uniform
:
limit
=
np
.
sqrt
(
6.0
/
float
(
fan_in
+
fan_out
))
limit
=
math
.
sqrt
(
6.0
/
float
(
fan_in
+
fan_out
))
out_var
=
_C_ops
.
uniform_random
(
'shape'
,
out_var
.
shape
,
'min'
,
out_var
=
_C_ops
.
uniform_random
(
'shape'
,
out_var
.
shape
,
'min'
,
-
limit
,
'max'
,
limit
,
'seed'
,
-
limit
,
'max'
,
limit
,
'seed'
,
self
.
_seed
,
'dtype'
,
out_dtype
)
self
.
_seed
,
'dtype'
,
out_dtype
)
else
:
else
:
std
=
np
.
sqrt
(
2.0
/
float
(
fan_in
+
fan_out
))
std
=
math
.
sqrt
(
2.0
/
float
(
fan_in
+
fan_out
))
out_var
=
_C_ops
.
gaussian_random
(
out_var
=
_C_ops
.
gaussian_random
(
'shape'
,
out_var
.
shape
,
'dtype'
,
out_dtype
,
'mean'
,
0.0
,
'shape'
,
out_var
.
shape
,
'dtype'
,
out_dtype
,
'mean'
,
0.0
,
'std'
,
std
,
'seed'
,
self
.
_seed
)
'std'
,
std
,
'seed'
,
self
.
_seed
)
...
@@ -581,7 +581,7 @@ class XavierInitializer(Initializer):
...
@@ -581,7 +581,7 @@ class XavierInitializer(Initializer):
return
None
return
None
else
:
else
:
if
self
.
_uniform
:
if
self
.
_uniform
:
limit
=
np
.
sqrt
(
6.0
/
float
(
fan_in
+
fan_out
))
limit
=
math
.
sqrt
(
6.0
/
float
(
fan_in
+
fan_out
))
op
=
block
.
append_op
(
op
=
block
.
append_op
(
type
=
"uniform_random"
,
type
=
"uniform_random"
,
inputs
=
{},
inputs
=
{},
...
@@ -595,7 +595,7 @@ class XavierInitializer(Initializer):
...
@@ -595,7 +595,7 @@ class XavierInitializer(Initializer):
},
},
stop_gradient
=
True
)
stop_gradient
=
True
)
else
:
else
:
std
=
np
.
sqrt
(
2.0
/
float
(
fan_in
+
fan_out
))
std
=
math
.
sqrt
(
2.0
/
float
(
fan_in
+
fan_out
))
op
=
block
.
append_op
(
op
=
block
.
append_op
(
type
=
"gaussian_random"
,
type
=
"gaussian_random"
,
outputs
=
{
"Out"
:
out_var
},
outputs
=
{
"Out"
:
out_var
},
...
@@ -713,13 +713,13 @@ class MSRAInitializer(Initializer):
...
@@ -713,13 +713,13 @@ class MSRAInitializer(Initializer):
if
framework
.
_non_static_mode
():
if
framework
.
_non_static_mode
():
if
self
.
_uniform
:
if
self
.
_uniform
:
limit
=
np
.
sqrt
(
6.0
/
float
(
fan_in
))
limit
=
math
.
sqrt
(
6.0
/
float
(
fan_in
))
out_var
=
_C_ops
.
uniform_random
(
'shape'
,
out_var
.
shape
,
'min'
,
out_var
=
_C_ops
.
uniform_random
(
'shape'
,
out_var
.
shape
,
'min'
,
-
limit
,
'max'
,
limit
,
'seed'
,
-
limit
,
'max'
,
limit
,
'seed'
,
self
.
_seed
,
'dtype'
,
self
.
_seed
,
'dtype'
,
int
(
out_dtype
))
int
(
out_dtype
))
else
:
else
:
std
=
np
.
sqrt
(
2.0
/
float
(
fan_in
))
std
=
math
.
sqrt
(
2.0
/
float
(
fan_in
))
out_var
=
_C_ops
.
gaussian_random
(
out_var
=
_C_ops
.
gaussian_random
(
'shape'
,
out_var
.
shape
,
'dtype'
,
'shape'
,
out_var
.
shape
,
'dtype'
,
int
(
out_dtype
),
'mean'
,
0.0
,
'std'
,
std
,
'seed'
,
self
.
_seed
)
int
(
out_dtype
),
'mean'
,
0.0
,
'std'
,
std
,
'seed'
,
self
.
_seed
)
...
@@ -734,7 +734,7 @@ class MSRAInitializer(Initializer):
...
@@ -734,7 +734,7 @@ class MSRAInitializer(Initializer):
return
None
return
None
else
:
else
:
if
self
.
_uniform
:
if
self
.
_uniform
:
limit
=
np
.
sqrt
(
6.0
/
float
(
fan_in
))
limit
=
math
.
sqrt
(
6.0
/
float
(
fan_in
))
op
=
block
.
append_op
(
op
=
block
.
append_op
(
type
=
"uniform_random"
,
type
=
"uniform_random"
,
inputs
=
{},
inputs
=
{},
...
@@ -749,7 +749,7 @@ class MSRAInitializer(Initializer):
...
@@ -749,7 +749,7 @@ class MSRAInitializer(Initializer):
stop_gradient
=
True
)
stop_gradient
=
True
)
else
:
else
:
std
=
np
.
sqrt
(
2.0
/
float
(
fan_in
))
std
=
math
.
sqrt
(
2.0
/
float
(
fan_in
))
op
=
block
.
append_op
(
op
=
block
.
append_op
(
type
=
"gaussian_random"
,
type
=
"gaussian_random"
,
outputs
=
{
"Out"
:
out_var
},
outputs
=
{
"Out"
:
out_var
},
...
...
python/paddle/fluid/tests/unittests/test_bernoulli_op.py
浏览文件 @
9714878c
...
@@ -75,9 +75,6 @@ class TestRandomValue(unittest.TestCase):
...
@@ -75,9 +75,6 @@ class TestRandomValue(unittest.TestCase):
if
not
paddle
.
is_compiled_with_cuda
():
if
not
paddle
.
is_compiled_with_cuda
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
print
(
"Test Fixed Random number on GPU------>"
)
print
(
"Test Fixed Random number on GPU------>"
)
paddle
.
disable_static
()
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
set_device
(
'gpu'
)
...
...
python/paddle/fluid/tests/unittests/test_dropout_op.py
浏览文件 @
9714878c
...
@@ -1034,9 +1034,6 @@ class TestRandomValue(unittest.TestCase):
...
@@ -1034,9 +1034,6 @@ class TestRandomValue(unittest.TestCase):
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
print
(
"Test Fixed Random number on V100 GPU------>"
)
print
(
"Test Fixed Random number on V100 GPU------>"
)
paddle
.
disable_static
()
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
set_device
(
'gpu'
)
...
...
python/paddle/fluid/tests/unittests/test_exponential_op.py
浏览文件 @
9714878c
...
@@ -100,9 +100,6 @@ class TestExponentialAPI(unittest.TestCase):
...
@@ -100,9 +100,6 @@ class TestExponentialAPI(unittest.TestCase):
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
print
(
"Test Fixed Random number on V100 GPU------>"
)
print
(
"Test Fixed Random number on V100 GPU------>"
)
paddle
.
disable_static
()
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
set_device
(
'gpu'
)
...
...
python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
浏览文件 @
9714878c
...
@@ -342,9 +342,6 @@ class TestRandomValue(unittest.TestCase):
...
@@ -342,9 +342,6 @@ class TestRandomValue(unittest.TestCase):
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
def
_check_random_value
(
dtype
,
expect
,
expect_mean
,
expect_std
):
def
_check_random_value
(
dtype
,
expect
,
expect_mean
,
expect_std
):
x
=
paddle
.
randn
([
32
,
3
,
1024
,
1024
],
dtype
=
dtype
)
x
=
paddle
.
randn
([
32
,
3
,
1024
,
1024
],
dtype
=
dtype
)
actual
=
x
.
numpy
()
actual
=
x
.
numpy
()
...
...
python/paddle/fluid/tests/unittests/test_linear.py
浏览文件 @
9714878c
...
@@ -73,6 +73,22 @@ class LinearTestCase(unittest.TestCase):
...
@@ -73,6 +73,22 @@ class LinearTestCase(unittest.TestCase):
np
.
testing
.
assert_array_almost_equal
(
res_f
,
res_nn
)
np
.
testing
.
assert_array_almost_equal
(
res_f
,
res_nn
)
np
.
testing
.
assert_array_almost_equal
(
res_nn
,
res_np
)
np
.
testing
.
assert_array_almost_equal
(
res_nn
,
res_np
)
def
test_weight_init
(
self
):
if
not
paddle
.
is_compiled_with_cuda
():
return
paddle
.
seed
(
100
)
linear
=
paddle
.
nn
.
Linear
(
2
,
3
,
weight_attr
=
paddle
.
nn
.
initializer
.
Normal
(
0
,
1.
))
paddle
.
nn
.
utils
.
_stride_column
(
linear
.
weight
)
expect
=
[[
1.4349908
,
-
0.8099171
,
-
2.64788
],
[
-
1.4981681
,
-
1.1784115
,
-
0.023253186
]]
self
.
assertTrue
(
np
.
allclose
(
linear
.
weight
.
numpy
(),
expect
))
linear
=
paddle
.
nn
.
Linear
(
2
,
3
)
expect
=
[[
0.73261100
,
0.43836895
,
0.07908206
],
[
0.85075015
,
-
1.04724526
,
0.64371765
]]
self
.
assertTrue
(
np
.
allclose
(
linear
.
weight
.
numpy
(),
expect
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_multinomial_op.py
浏览文件 @
9714878c
...
@@ -227,9 +227,6 @@ class TestRandomValue(unittest.TestCase):
...
@@ -227,9 +227,6 @@ class TestRandomValue(unittest.TestCase):
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
print
(
"Test Fixed Random number on V100 GPU------>"
)
print
(
"Test Fixed Random number on V100 GPU------>"
)
paddle
.
disable_static
()
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
set_device
(
'gpu'
)
...
...
python/paddle/fluid/tests/unittests/test_poisson_op.py
浏览文件 @
9714878c
...
@@ -107,9 +107,6 @@ class TestPoissonAPI(unittest.TestCase):
...
@@ -107,9 +107,6 @@ class TestPoissonAPI(unittest.TestCase):
if
not
paddle
.
is_compiled_with_cuda
():
if
not
paddle
.
is_compiled_with_cuda
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
print
(
"Test Fixed Random number on GPU------>"
)
print
(
"Test Fixed Random number on GPU------>"
)
paddle
.
disable_static
()
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
set_device
(
'gpu'
)
...
...
python/paddle/fluid/tests/unittests/test_randint_op.py
浏览文件 @
9714878c
...
@@ -198,9 +198,6 @@ class TestRandomValue(unittest.TestCase):
...
@@ -198,9 +198,6 @@ class TestRandomValue(unittest.TestCase):
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
print
(
"Test Fixed Random number on GPU------>"
)
print
(
"Test Fixed Random number on GPU------>"
)
paddle
.
disable_static
()
paddle
.
disable_static
()
...
...
python/paddle/fluid/tests/unittests/test_randperm_op.py
浏览文件 @
9714878c
...
@@ -155,9 +155,6 @@ class TestRandomValue(unittest.TestCase):
...
@@ -155,9 +155,6 @@ class TestRandomValue(unittest.TestCase):
if
not
paddle
.
is_compiled_with_cuda
():
if
not
paddle
.
is_compiled_with_cuda
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
print
(
"Test Fixed Random number on GPU------>"
)
print
(
"Test Fixed Random number on GPU------>"
)
paddle
.
disable_static
()
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
set_device
(
'gpu'
)
...
...
python/paddle/fluid/tests/unittests/test_uniform_random_op.py
浏览文件 @
9714878c
...
@@ -573,37 +573,46 @@ class TestRandomValue(unittest.TestCase):
...
@@ -573,37 +573,46 @@ class TestRandomValue(unittest.TestCase):
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
if
not
"V100"
in
paddle
.
device
.
cuda
.
get_device_name
():
return
return
if
os
.
getenv
(
"FLAGS_use_curand"
,
None
)
in
(
'0'
,
'False'
,
None
):
return
def
_check_random_value
(
dtype
,
expect
,
expect_mean
,
expect_std
):
x
=
paddle
.
rand
([
32
,
3
,
1024
,
1024
],
dtype
=
dtype
)
actual
=
x
.
numpy
()
self
.
assertTrue
(
np
.
allclose
(
actual
[
2
,
1
,
512
,
1000
:
1010
],
expect
))
self
.
assertEqual
(
np
.
mean
(
actual
),
expect_mean
)
self
.
assertEqual
(
np
.
std
(
actual
),
expect_std
)
print
(
"Test Fixed Random number on V100 GPU------>"
)
print
(
"Test Fixed Random number on V100 GPU------>"
)
paddle
.
disable_static
()
paddle
.
disable_static
()
paddle
.
set_device
(
'gpu'
)
paddle
.
set_device
(
'gpu'
)
paddle
.
seed
(
2021
)
paddle
.
seed
(
2021
)
expect_mean
=
0.50000454338820143895816272561205551028251647949218750
expect_std
=
0.28867379167297479991560749112977646291255950927734375
expect
=
[
expect
=
[
0.55298901
,
0.65184678
,
0.49375412
,
0.57943639
,
0.16459608
,
0.55298901
,
0.65184678
,
0.49375412
,
0.57943639
,
0.16459608
,
0.67181056
,
0.03021481
,
0.0238559
,
0.07742096
,
0.55972187
0.67181056
,
0.03021481
,
0.0238559
,
0.07742096
,
0.55972187
]
]
expect_mean
=
0.50000454338820143895816272561205551028251647949218750
out
=
paddle
.
rand
([
32
,
3
,
1024
,
1024
],
dtype
=
'float64'
).
numpy
()
expect_std
=
0.28867379167297479991560749112977646291255950927734375
self
.
assertEqual
(
np
.
mean
(
out
),
expect_mean
)
_check_random_value
(
core
.
VarDesc
.
VarType
.
FP64
,
expect
,
expect_mean
,
self
.
assertEqual
(
np
.
std
(
out
),
expect_std
)
expect_std
)
self
.
assertTrue
(
np
.
allclose
(
out
[
2
,
1
,
512
,
1000
:
1010
],
expect
)
)
expect_mean
=
0.50002604722976684570312500
expect_std
=
0.2886914908885955810546875
expect
=
[
expect
=
[
0.45320973
,
0.17582087
,
0.725341
,
0.30849215
,
0.622257
,
0.46352342
,
0.45320973
,
0.17582087
,
0.725341
,
0.30849215
,
0.622257
,
0.46352342
,
0.97228295
,
0.12771158
,
0.286525
,
0.9810645
0.97228295
,
0.12771158
,
0.286525
,
0.9810645
]
]
expect_mean
=
0.50002604722976684570312500
out
=
paddle
.
rand
([
32
,
3
,
1024
,
1024
],
dtype
=
'float32'
).
numpy
()
expect_std
=
0.2886914908885955810546875
self
.
assertEqual
(
np
.
mean
(
out
),
expect_mean
)
_check_random_value
(
core
.
VarDesc
.
VarType
.
FP32
,
expect
,
expect_mean
,
self
.
assertEqual
(
np
.
std
(
out
),
expect_std
)
expect_std
)
self
.
assertTrue
(
np
.
allclose
(
out
[
2
,
1
,
512
,
1000
:
1010
],
expect
))
expect_mean
=
25.11843109130859375
expect_std
=
43.370647430419921875
expect
=
[
30.089634
,
77.05225
,
3.1201615
,
68.34072
,
59.266724
,
-
25.33281
,
12.973292
,
27.41127
,
-
17.412298
,
27.931019
]
out
=
paddle
.
empty
(
[
16
,
16
,
16
,
16
],
dtype
=
'float32'
).
uniform_
(
-
50
,
100
).
numpy
()
self
.
assertEqual
(
np
.
mean
(
out
),
expect_mean
)
self
.
assertEqual
(
np
.
std
(
out
),
expect_std
)
self
.
assertTrue
(
np
.
allclose
(
out
[
10
,
10
,
10
,
0
:
10
],
expect
))
paddle
.
enable_static
()
paddle
.
enable_static
()
...
...
python/paddle/nn/utils/__init__.py
浏览文件 @
9714878c
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
from
.spectral_norm_hook
import
spectral_norm
from
.spectral_norm_hook
import
spectral_norm
from
.weight_norm_hook
import
weight_norm
,
remove_weight_norm
# noqa: F401
from
.weight_norm_hook
import
weight_norm
,
remove_weight_norm
# noqa: F401
from
.transform_parameters
import
parameters_to_vector
,
vector_to_parameters
# noqa: F401
from
.transform_parameters
import
parameters_to_vector
,
vector_to_parameters
,
_stride_column
# noqa: F401
__all__
=
[
#noqa
__all__
=
[
#noqa
'weight_norm'
,
'remove_weight_norm'
,
'spectral_norm'
,
'parameters_to_vector'
,
'vector_to_parameters'
'weight_norm'
,
'remove_weight_norm'
,
'spectral_norm'
,
'parameters_to_vector'
,
'vector_to_parameters'
...
...
python/paddle/nn/utils/transform_parameters.py
浏览文件 @
9714878c
...
@@ -36,6 +36,39 @@ def _inplace_reshape_dygraph(x, shape):
...
@@ -36,6 +36,39 @@ def _inplace_reshape_dygraph(x, shape):
stop_gradient
=
True
)
stop_gradient
=
True
)
@
dygraph_only
def
_stride_column
(
param
):
"""
A tool function. Permute date of parameter as a 'columns' stride. Now, it only support 2-D parameter.
Args:
param(Tensor]): The param that will be strided according to 'columns'.
Examples:
.. code-block:: python
import paddle
paddle.seed(100)
linear = paddle.nn.Linear(2, 3)
print(linear.weight)
# [[-0.31485492, -1.02896988, 0.45741916],
# [-0.65525872, -1.04643178, 1.07262802]]
paddle.nn.utils.stride_column(linear.weight)
print(linear.weight)
# [[-0.31485492, 0.45741916, -1.04643178],
# [-1.02896988, -0.65525872, 1.07262802]]
"""
assert
len
(
param
.
shape
)
==
2
shape
=
[
param
.
shape
[
1
],
param
.
shape
[
0
]]
with
paddle
.
fluid
.
dygraph
.
no_grad
():
reshape_var
=
paddle
.
reshape
(
param
,
shape
)
transpose_var
=
paddle
.
transpose
(
reshape_var
,
[
1
,
0
])
transpose_var
.
_share_underline_tensor_to
(
param
)
@
dygraph_only
@
dygraph_only
def
parameters_to_vector
(
parameters
,
name
=
None
):
def
parameters_to_vector
(
parameters
,
name
=
None
):
"""
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录