Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7c274dc0
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7c274dc0
编写于
8月 21, 2017
作者:
Q
qijun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use curand
上级
d525abed
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
70 addition
and
24 deletion
+70
-24
paddle/operators/math/math_function.cc
paddle/operators/math/math_function.cc
+9
-0
paddle/operators/math/math_function.cu
paddle/operators/math/math_function.cu
+40
-16
paddle/operators/math/math_function.h
paddle/operators/math/math_function.h
+8
-0
paddle/platform/device_context.cc
paddle/platform/device_context.cc
+10
-5
paddle/platform/device_context.h
paddle/platform/device_context.h
+3
-3
未找到文件。
paddle/operators/math/math_function.cc
浏览文件 @
7c274dc0
...
...
@@ -109,6 +109,15 @@ void matmul<platform::CPUPlace, double>(const framework::Tensor& matrix_a,
matrix_b
.
data
<
double
>
(),
beta
,
matrix_out
->
data
<
double
>
(),
context
);
}
template
<
>
void
Set
<
typename
CPUPlace
,
typename
float
>
(
const
int
n
,
const
float
alpha
,
float
*
output
,
platform
::
DeviceContext
*
context
)
{
auto
*
cpu_context
=
reinterpret_cast
<
platform
::
CPUDeviceContext
*>
(
context
);
framework
::
EigenVector
::
Type
<
T
>
out
(
output
,
n
);
out
.
device
(
*
(
cpu_context
->
eigen_device
()))
=
t
.
constant
(
T
(
alpha
));
}
template
<
>
void
RandUniform
<
platform
::
CPUPlace
,
float
>
(
const
int
n
,
const
float
min
,
const
float
max
,
float
*
output
,
...
...
paddle/operators/math/math_function.cu
浏览文件 @
7c274dc0
...
...
@@ -126,20 +126,48 @@ void matmul<platform::GPUPlace, double>(const framework::Tensor& matrix_a,
matrix_b
.
data
<
double
>
(),
beta
,
matrix_out
->
data
<
double
>
(),
context
);
}
template
<
>
void
Set
<
typename
GPUPlace
,
typename
float
>
(
const
int
n
,
const
float
alpha
,
float
*
output
,
platform
::
DeviceContext
*
context
)
{
auto
*
cuda_context
=
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
);
framework
::
EigenVector
::
Type
<
T
>
out
(
output
,
n
);
out
.
device
(
*
(
cuda_context
->
eigen_device
()))
=
t
.
constant
(
T
(
alpha
));
}
template
<
typename
T
>
__global__
void
UniformShift
(
const
int
n
,
const
T
min
,
const
T
max
,
T
*
x
)
{
float
scale
=
max
-
min
;
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
n
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
x
[
i
]
=
x
[
i
]
*
scale
+
min
;
}
}
template
<
>
void
RandUniform
<
platform
::
GPUPlace
,
float
>
(
const
int
n
,
const
float
min
,
const
float
max
,
float
*
output
,
platform
::
DeviceContext
*
context
)
{
auto
*
cuda_context
=
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
);
thrust
::
uniform_real_distribution
<
float
>
distribution
(
min
,
max
);
thrust
::
minstd_rand
engine
=
cuda_context
->
rand_enigne
();
engine
->
discard
(
n
);
thrust
::
counting_iterator
<
unsigned
int
>
index_sequence_begin
(
0
);
PADDLE_ENFORCE
(
curandGenerateUniform
(
cuda_context
->
curand_generator
(),
output
,
n
));
int
block
=
512
;
int
grid
=
(
n
+
block
-
1
)
/
block
;
UniformShift
<
float
><<<
grid
,
block
,
0
,
cuda_context
->
stream
()
>>>
(
n
,
min
,
max
,
output
);
}
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
cuda_context
->
stream
()),
index_sequence_begin
,
index_sequence_begin
+
n
,
thrust
::
device_ptr
<
float
>
(
output
),
distribution
(
engine
));
template
<
typename
T
>
int
HandleOddLengthRandGaussian
(
const
int
n
,
const
T
mean
,
const
T
std
,
T
*
output
,
CUDADeviceContext
*
context
)
{
if
(
n
%
2
==
1
)
{
std
::
default_random_engine
generator
;
std
::
normal_distribution
<
T
>
distribution
(
mean
,
std
);
const
T
random_value
=
distribution
(
generator
);
Set
<
T
,
platform
::
GPUPlace
>
(
1
,
random_value
,
output
+
(
n
-
1
),
context
);
return
n
-
1
;
}
return
n
;
}
template
<
>
...
...
@@ -147,15 +175,11 @@ void RandGaussian<platform::GPUPlace, float>(const int n, const float mean,
const
float
std
,
float
*
output
,
platform
::
DeviceContext
*
context
)
{
auto
*
cuda_context
=
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
);
thrust
::
normal_distribution
<
float
>
distribution
(
mean
,
std
);
thrust
::
minstd_rand
engine
=
cuda_context
->
rand_enigne
();
engine
->
discard
(
n
);
thrust
::
counting_iterator
<
unsigned
int
>
index_sequence_begin
(
0
);
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
cuda_context
->
stream
()),
index_sequence_begin
,
index_sequence_begin
+
n
,
thrust
::
device_ptr
<
float
>
(
output
),
distribution
(
engine
));
const
int
even_n
=
HandleOddLengthRandGaussian
<
float
>
(
n
,
mean
,
std
,
output
,
cuda_context
);
PADDLE_ENFORCE
(
curandGenerateNormal
(
cuda_context
->
curand_generator
(),
output
,
even_n
,
mean
,
std
));
}
}
// namespace math
...
...
paddle/operators/math/math_function.h
浏览文件 @
7c274dc0
...
...
@@ -54,6 +54,7 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda,
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/eigen.h"
#include "paddle/platform/enforce.h"
namespace
paddle
{
...
...
@@ -77,6 +78,13 @@ void matmul(const framework::Tensor& matrix_a, bool trans_a,
framework
::
Tensor
*
matrix_out
,
T
beta
,
platform
::
DeviceContext
*
context
);
template
<
typename
Place
,
typename
T
>
void
Set
(
const
int
n
,
const
T
alpha
,
T
*
output
,
platform
::
DeviceContext
*
context
)
{
framework
::
EigenVector
::
Type
<
T
>
out
(
output
,
n
);
out
.
device
(
*
(
context
->
eigen_device
()))
=
t
.
constant
(
T
(
alpha
));
}
template
<
typename
Place
,
typename
T
>
void
RandUniform
(
const
int
n
,
const
T
min
,
const
T
max
,
T
*
output
,
platform
::
DeviceContext
*
context
);
...
...
paddle/platform/device_context.cc
浏览文件 @
7c274dc0
...
...
@@ -157,12 +157,17 @@ cudnnHandle_t CUDADeviceContext::cudnn_handle() {
return
cudnn_handle_
;
}
thrust
::
minstd_rand
&
CPUDeviceContext
::
rand_engine
()
{
if
(
!
rand_engine_
)
{
rand_engine_
.
reset
(
new
thrust
::
minstd_rand
());
rand_engine_
->
seed
(
rand_seed_
);
curandGenerator_t
CUDADeviceContext
::
curand_generator
()
{
if
(
!
curand_generator_
)
{
SetDeviceId
(
place_
.
device
);
PADDLE_ENFORCE
(
dynload
::
curandCreateGenerator
(
&
curand_generator_
,
CURAND_RNG_PSEUDO_DEFAULT
));
PADDLE_ENFORCE
(
dynload
::
curandSetPseudoRandomGeneratorSeed
(
curand_generator_
,
seed_
));
PADDLE_ENFORCE
(
dynload
::
curandSetStream
(
curand_generator_
,
stream_
));
}
return
*
(
rand_engine_
.
get
())
;
return
curand_generator_
;
}
cudaStream_t
CUDADeviceContext
::
stream
()
{
return
stream_
;
}
...
...
paddle/platform/device_context.h
浏览文件 @
7c274dc0
...
...
@@ -15,10 +15,9 @@ limitations under the License. */
#include "paddle/platform/place.h"
#ifndef PADDLE_ONLY_CPU
#include <thrust/device_ptr.h>
#include <thrust/random.h>
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/dynload/curand.h"
#include "paddle/platform/gpu_info.h"
#define EIGEN_USE_GPU
#endif
...
...
@@ -80,7 +79,8 @@ class CUDADeviceContext : public DeviceContext {
/*! \brief Return cudnn handle in the device context. */
cudnnHandle_t
cudnn_handle
();
thrust
::
minstd_rand
&
CPUDeviceContext
::
rand_engine
();
/*! \brief Return curand handle in the device context. */
curandGenerator_t
curand_generator
();
/*! \brief Return cuda stream in the device context. */
cudaStream_t
stream
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录