Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
jobily
Paddle
提交
29a9f9b5
P
Paddle
项目概览
jobily
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
29a9f9b5
编写于
11月 02, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine code format and fix threads number.
上级
5a4cdbb3
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
41 addition
and
41 deletion
+41
-41
paddle/operators/math/detail/activation_functions.h
paddle/operators/math/detail/activation_functions.h
+28
-28
paddle/operators/math/detail/avx_functions.cc
paddle/operators/math/detail/avx_functions.cc
+11
-11
paddle/operators/math/detail/lstm_gpu_kernel.h
paddle/operators/math/detail/lstm_gpu_kernel.h
+2
-2
未找到文件。
paddle/operators/math/detail/activation_functions.h
浏览文件 @
29a9f9b5
...
...
@@ -32,17 +32,17 @@ namespace detail {
namespace
forward
{
template
<
typename
T
>
DEVICE
T
linear
(
const
T
a
)
{
DEVICE
T
Identity
(
const
T
a
)
{
return
a
;
}
template
<
typename
T
>
DEVICE
T
r
elu
(
const
T
a
)
{
DEVICE
T
R
elu
(
const
T
a
)
{
return
a
>
static_cast
<
T
>
(
0.0
)
?
a
:
static_cast
<
T
>
(
0.0
);
}
template
<
typename
T
>
DEVICE
T
s
igmoid
(
const
T
a
)
{
DEVICE
T
S
igmoid
(
const
T
a
)
{
const
T
min
=
SIGMOID_THRESHOLD_MIN
;
const
T
max
=
SIGMOID_THRESHOLD_MAX
;
T
tmp
=
(
a
<
min
)
?
min
:
((
a
>
max
)
?
max
:
a
);
...
...
@@ -50,7 +50,7 @@ DEVICE T sigmoid(const T a) {
}
template
<
typename
T
>
DEVICE
T
t
anh
(
const
T
a
)
{
DEVICE
T
T
anh
(
const
T
a
)
{
T
tmp
=
-
2.0
*
a
;
tmp
=
(
tmp
>
EXP_MAX_INPUT
)
?
EXP_MAX_INPUT
:
tmp
;
return
(
2.0
/
(
1.0
+
exp
(
tmp
)))
-
1.0
;
...
...
@@ -61,22 +61,22 @@ DEVICE T tanh(const T a) {
namespace
backward
{
template
<
typename
T
>
DEVICE
T
linear
(
const
T
a
,
const
T
b
)
{
DEVICE
T
Identity
(
const
T
a
,
const
T
b
)
{
return
a
;
}
template
<
typename
T
>
DEVICE
T
r
elu
(
const
T
a
,
const
T
b
)
{
DEVICE
T
R
elu
(
const
T
a
,
const
T
b
)
{
return
a
*
(
b
>
0.0
?
1.0
:
0.0
);
}
template
<
typename
T
>
DEVICE
T
s
igmoid
(
const
T
a
,
const
T
b
)
{
DEVICE
T
S
igmoid
(
const
T
a
,
const
T
b
)
{
return
a
*
b
*
(
1.0
-
b
);
}
template
<
typename
T
>
DEVICE
T
t
anh
(
const
T
a
,
const
T
b
)
{
DEVICE
T
T
anh
(
const
T
a
,
const
T
b
)
{
return
a
*
(
1.0
-
b
*
b
);
}
...
...
@@ -89,20 +89,20 @@ struct Active {
};
static
DEVICE
Active
<
float
>::
Act
kActFloat
[]
=
{
&
forward
::
sigmoid
<
float
>
,
&
forward
::
relu
<
float
>
,
&
forward
::
t
anh
<
float
>
,
&
forward
::
linear
<
float
>
};
&
forward
::
Sigmoid
<
float
>
,
&
forward
::
Relu
<
float
>
,
&
forward
::
T
anh
<
float
>
,
&
forward
::
Identity
<
float
>
};
static
DEVICE
Active
<
float
>::
ActGrad
kActGradFloat
[]
=
{
&
backward
::
sigmoid
<
float
>
,
&
backward
::
relu
<
float
>
,
&
backward
::
t
anh
<
float
>
,
&
backward
::
linear
<
float
>
};
&
backward
::
Sigmoid
<
float
>
,
&
backward
::
Relu
<
float
>
,
&
backward
::
T
anh
<
float
>
,
&
backward
::
Identity
<
float
>
};
static
DEVICE
Active
<
double
>::
Act
kActDouble
[]
=
{
&
forward
::
sigmoid
<
double
>
,
&
forward
::
relu
<
double
>
,
&
forward
::
t
anh
<
double
>
,
&
forward
::
linear
<
double
>
};
&
forward
::
Sigmoid
<
double
>
,
&
forward
::
Relu
<
double
>
,
&
forward
::
T
anh
<
double
>
,
&
forward
::
Identity
<
double
>
};
static
DEVICE
Active
<
double
>::
ActGrad
kActGradDouble
[]
=
{
&
backward
::
sigmoid
<
double
>
,
&
backward
::
r
elu
<
double
>
,
&
backward
::
tanh
<
double
>
,
&
backward
::
linear
<
double
>
};
&
backward
::
Sigmoid
<
double
>
,
&
backward
::
R
elu
<
double
>
,
&
backward
::
Tanh
<
double
>
,
&
backward
::
Identity
<
double
>
};
namespace
forward
{
inline
DEVICE
float
activation
(
float
a
,
int
index
)
{
...
...
@@ -128,29 +128,29 @@ inline DEVICE double activation(double a, double b, int index) {
#ifdef __AVX__
namespace
forward
{
namespace
avx
{
__m256
r
elu
(
const
__m256
a
);
__m256
s
igmoid
(
const
__m256
a
);
__m256
t
anh
(
const
__m256
a
);
__m256
linear
(
const
__m256
a
);
__m256
R
elu
(
const
__m256
a
);
__m256
S
igmoid
(
const
__m256
a
);
__m256
T
anh
(
const
__m256
a
);
__m256
Identity
(
const
__m256
a
);
}
// namespace avx
}
// namespace forward
namespace
backward
{
namespace
avx
{
__m256
r
elu
(
const
__m256
a
,
const
__m256
b
);
__m256
s
igmoid
(
const
__m256
a
,
const
__m256
b
);
__m256
t
anh
(
const
__m256
a
,
const
__m256
b
);
__m256
linear
(
const
__m256
a
,
const
__m256
b
);
__m256
R
elu
(
const
__m256
a
,
const
__m256
b
);
__m256
S
igmoid
(
const
__m256
a
,
const
__m256
b
);
__m256
T
anh
(
const
__m256
a
,
const
__m256
b
);
__m256
Identity
(
const
__m256
a
,
const
__m256
b
);
}
// namespace avx
}
// namespace backward
static
Active
<
__m256
>::
Act
kActAvx
[]
=
{
&
forward
::
avx
::
sigmoid
,
&
forward
::
avx
::
relu
,
&
forward
::
avx
::
t
anh
,
&
forward
::
avx
::
linear
};
&
forward
::
avx
::
Sigmoid
,
&
forward
::
avx
::
Relu
,
&
forward
::
avx
::
T
anh
,
&
forward
::
avx
::
Identity
};
static
Active
<
__m256
>::
ActGrad
kActGradAvx
[]
=
{
&
backward
::
avx
::
sigmoid
,
&
backward
::
avx
::
relu
,
&
backward
::
avx
::
t
anh
,
&
backward
::
avx
::
linear
};
&
backward
::
avx
::
Sigmoid
,
&
backward
::
avx
::
Relu
,
&
backward
::
avx
::
T
anh
,
&
backward
::
avx
::
Identity
};
namespace
forward
{
inline
__m256
activation
(
__m256
a
,
int
index
)
{
return
kActAvx
[
index
](
a
);
}
...
...
paddle/operators/math/detail/avx_functions.cc
浏览文件 @
29a9f9b5
...
...
@@ -22,61 +22,61 @@ namespace operators {
namespace
math
{
namespace
detail
{
__m256
e
xp
(
__m256
a
)
{
return
exp256_ps
(
a
);
}
__m256
E
xp
(
__m256
a
)
{
return
exp256_ps
(
a
);
}
namespace
forward
{
namespace
avx
{
__m256
r
elu
(
const
__m256
a
)
{
__m256
R
elu
(
const
__m256
a
)
{
__m256
tmp
=
_mm256_set1_ps
(
0.0
f
);
return
_mm256_max_ps
(
a
,
tmp
);
}
__m256
s
igmoid
(
const
__m256
a
)
{
__m256
S
igmoid
(
const
__m256
a
)
{
__m256
max
=
_mm256_set1_ps
(
SIGMOID_THRESHOLD_MAX
);
__m256
min
=
_mm256_set1_ps
(
SIGMOID_THRESHOLD_MIN
);
__m256
tmp
=
_mm256_max_ps
(
a
,
min
);
tmp
=
_mm256_min_ps
(
tmp
,
max
);
tmp
=
_mm256_sub_ps
(
_mm256_set1_ps
(
0.0
f
),
tmp
);
tmp
=
e
xp
(
tmp
);
tmp
=
E
xp
(
tmp
);
tmp
=
_mm256_add_ps
(
_mm256_set1_ps
(
1.0
f
),
tmp
);
tmp
=
_mm256_div_ps
(
_mm256_set1_ps
(
1.0
f
),
tmp
);
return
tmp
;
}
__m256
t
anh
(
const
__m256
a
)
{
__m256
T
anh
(
const
__m256
a
)
{
__m256
max
=
_mm256_set1_ps
(
EXP_MAX_INPUT
);
__m256
tmp
=
_mm256_mul_ps
(
_mm256_set1_ps
(
-
2.0
f
),
a
);
tmp
=
_mm256_min_ps
(
tmp
,
max
);
tmp
=
e
xp
(
tmp
);
tmp
=
E
xp
(
tmp
);
return
_mm256_sub_ps
(
_mm256_div_ps
(
_mm256_set1_ps
(
2.0
f
),
_mm256_add_ps
(
_mm256_set1_ps
(
1.0
f
),
tmp
)),
_mm256_set1_ps
(
1.0
f
));
}
__m256
linear
(
const
__m256
a
)
{
return
a
;
}
__m256
Identity
(
const
__m256
a
)
{
return
a
;
}
}
// namespace avx
}
// namespace forward
namespace
backward
{
namespace
avx
{
__m256
r
elu
(
const
__m256
a
,
const
__m256
b
)
{
__m256
R
elu
(
const
__m256
a
,
const
__m256
b
)
{
return
_mm256_mul_ps
(
a
,
_mm256_and_ps
(
_mm256_cmp_ps
(
b
,
_mm256_set1_ps
(
0.0
f
),
_CMP_GT_OS
),
_mm256_set1_ps
(
1.0
f
)));
}
__m256
s
igmoid
(
const
__m256
a
,
const
__m256
b
)
{
__m256
S
igmoid
(
const
__m256
a
,
const
__m256
b
)
{
return
_mm256_mul_ps
(
_mm256_mul_ps
(
a
,
b
),
_mm256_sub_ps
(
_mm256_set1_ps
(
1.0
f
),
b
));
}
__m256
t
anh
(
const
__m256
a
,
const
__m256
b
)
{
__m256
T
anh
(
const
__m256
a
,
const
__m256
b
)
{
return
_mm256_mul_ps
(
a
,
_mm256_sub_ps
(
_mm256_set1_ps
(
1.0
f
),
_mm256_mul_ps
(
b
,
b
)));
}
__m256
linear
(
const
__m256
a
,
const
__m256
b
)
{
return
a
;
}
__m256
Identity
(
const
__m256
a
,
const
__m256
b
)
{
return
a
;
}
}
// namespace avx
}
// namespace backward
...
...
paddle/operators/math/detail/lstm_gpu_kernel.h
浏览文件 @
29a9f9b5
...
...
@@ -226,9 +226,9 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op,
threads
=
dim3
(
framePerBlock
,
1
);
grid
=
dim3
(
frameBlocks
,
1
);
}
else
{
/* framePerBlock = 32 batchPerBlock =
32
*/
/* framePerBlock = 32 batchPerBlock =
16
*/
threads
=
dim3
(
32
,
16
);
grid
=
dim3
((
frameSize
+
32
-
1
)
/
32
,
(
batchSize
+
32
-
1
)
/
32
);
grid
=
dim3
((
frameSize
+
32
-
1
)
/
32
,
(
batchSize
+
16
-
1
)
/
16
);
}
auto
stream
=
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录