Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
49dedfad
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
49dedfad
编写于
5月 02, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Polish code and tests
上级
c888e016
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
59 addition
and
37 deletion
+59
-37
paddle/fluid/operators/math/blas_impl.cu.h
paddle/fluid/operators/math/blas_impl.cu.h
+14
-3
paddle/fluid/operators/math/math_function_test.cc
paddle/fluid/operators/math/math_function_test.cc
+11
-6
paddle/fluid/operators/math/math_function_test.cu
paddle/fluid/operators/math/math_function_test.cu
+34
-28
未找到文件。
paddle/fluid/operators/math/blas_impl.cu.h
浏览文件 @
49dedfad
...
...
@@ -42,9 +42,20 @@ struct CUBlas<double> {
template
<
>
struct
CUBlas
<
platform
::
float16
>
{
template
<
typename
...
ARGS
>
static
void
GEMM
(
ARGS
...
args
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cublasHgemm
(
args
...));
using
float16
=
platform
::
float16
;
static
void
GEMM
(
cublasHandle_t
handle
,
cublasOperation_t
transa
,
cublasOperation_t
transb
,
int
m
,
int
n
,
int
k
,
const
float16
*
alpha
,
const
float16
*
A
,
int
lda
,
const
float16
*
B
,
int
ldb
,
const
float16
*
beta
,
float16
*
C
,
int
ldc
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cublasHgemm
(
handle
,
transa
,
transb
,
m
,
n
,
k
,
reinterpret_cast
<
const
__half
*>
(
alpha
),
reinterpret_cast
<
const
__half
*>
(
A
),
lda
,
reinterpret_cast
<
const
__half
*>
(
B
),
ldb
,
reinterpret_cast
<
const
__half
*>
(
beta
),
reinterpret_cast
<
__half
*>
(
C
),
ldc
));
}
};
...
...
paddle/fluid/operators/math/math_function_test.cc
浏览文件 @
49dedfad
...
...
@@ -14,6 +14,13 @@
#include "paddle/fluid/operators/math/math_function.h"
#include "gtest/gtest.h"
template
<
typename
T
>
inline
paddle
::
operators
::
math
::
BlasT
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
GetBlas
(
const
paddle
::
platform
::
CPUDeviceContext
&
context
)
{
return
paddle
::
operators
::
math
::
GetBlas
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
(
context
);
}
TEST
(
math_function
,
gemm_notrans_cblas
)
{
paddle
::
framework
::
Tensor
input1
;
paddle
::
framework
::
Tensor
input2
;
...
...
@@ -34,9 +41,8 @@ TEST(math_function, gemm_notrans_cblas) {
memcpy
(
input3_ptr
,
arr3
,
8
*
sizeof
(
float
));
paddle
::
platform
::
CPUDeviceContext
context
(
*
cpu_place
);
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
(
context
,
false
,
false
,
m
,
n
,
k
,
1
,
input1_ptr
,
3
,
input2_ptr
+
1
,
4
,
1
,
input3_ptr
+
1
,
4
);
GetBlas
<
float
>
(
context
).
GEMM
(
false
,
false
,
m
,
n
,
k
,
1
,
input1_ptr
,
3
,
input2_ptr
+
1
,
4
,
1
,
input3_ptr
+
1
,
4
);
EXPECT_EQ
(
input3_ptr
[
0
],
0
);
EXPECT_EQ
(
input3_ptr
[
1
],
24
);
...
...
@@ -68,9 +74,8 @@ TEST(math_function, gemm_trans_clbas) {
memcpy
(
input3_ptr
,
arr3
,
8
*
sizeof
(
float
));
paddle
::
platform
::
CPUDeviceContext
context
(
*
cpu_place
);
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
(
context
,
false
,
true
,
m
,
n
,
k
,
1
,
input1_ptr
,
3
,
input2_ptr
+
3
,
3
,
1
,
input3_ptr
+
1
,
4
);
GetBlas
<
float
>
(
context
).
GEMM
(
false
,
true
,
m
,
n
,
k
,
1
,
input1_ptr
,
3
,
input2_ptr
+
3
,
3
,
1
,
input3_ptr
+
1
,
4
);
EXPECT_EQ
(
input3_ptr
[
0
],
0
);
EXPECT_EQ
(
input3_ptr
[
1
],
24
);
...
...
paddle/fluid/operators/math/math_function_test.cu
浏览文件 @
49dedfad
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "gtest/gtest.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
void
fill_fp16_data
(
paddle
::
platform
::
float16
*
in_ptr
,
size_t
size
,
const
std
::
vector
<
float
>&
data
)
{
...
...
@@ -23,8 +24,8 @@ void fill_fp16_data(paddle::platform::float16* in_ptr, size_t size,
}
TEST
(
math_function
,
notrans_mul_trans_fp32
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
input1
;
Tensor
input1_gpu
;
...
...
@@ -59,8 +60,8 @@ TEST(math_function, notrans_mul_trans_fp32) {
}
TEST
(
math_function
,
notrans_mul_trans_fp16
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
input1
;
Tensor
input1_gpu
;
...
...
@@ -100,8 +101,8 @@ TEST(math_function, notrans_mul_trans_fp16) {
}
TEST
(
math_function
,
trans_mul_notrans_fp32
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
input1
;
Tensor
input1_gpu
;
...
...
@@ -141,8 +142,8 @@ TEST(math_function, trans_mul_notrans_fp32) {
}
TEST
(
math_function
,
trans_mul_notrans_fp16
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
input1
;
Tensor
input1_gpu
;
...
...
@@ -186,9 +187,16 @@ TEST(math_function, trans_mul_notrans_fp16) {
EXPECT_EQ
(
static_cast
<
float
>
(
out_ptr
[
8
]),
29
);
}
template
<
typename
T
>
inline
paddle
::
operators
::
math
::
BlasT
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
GetBlas
(
const
paddle
::
platform
::
CUDADeviceContext
&
context
)
{
return
paddle
::
operators
::
math
::
GetBlas
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
(
context
);
}
TEST
(
math_function
,
gemm_notrans_cublas_fp32
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
input1
;
Tensor
input2
;
...
...
@@ -221,8 +229,8 @@ TEST(math_function, gemm_notrans_cublas_fp32) {
float
*
b
=
input2_gpu
.
data
<
float
>
();
float
*
c
=
input3_gpu
.
mutable_data
<
float
>
(
gpu_place
);
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
false
,
false
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
1
,
4
,
1
,
c
+
1
,
4
);
GetBlas
<
float
>
(
context
).
GEMM
(
false
,
false
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
1
,
4
,
1
,
c
+
1
,
4
);
TensorCopySync
(
input3_gpu
,
cpu_place
,
&
input3
);
...
...
@@ -244,8 +252,8 @@ TEST(math_function, gemm_notrans_cublas_fp32) {
}
TEST
(
math_function
,
gemm_notrans_cublas_fp16
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
input1
;
Tensor
input2
;
...
...
@@ -281,9 +289,8 @@ TEST(math_function, gemm_notrans_cublas_fp16) {
float16
*
b
=
input2_gpu
.
data
<
float16
>
();
float16
*
c
=
input3_gpu
.
mutable_data
<
float16
>
(
gpu_place
);
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float16
>
(
context
,
false
,
false
,
m
,
n
,
k
,
float16
(
1
),
a
,
3
,
b
+
1
,
4
,
float16
(
1
),
c
+
1
,
4
);
GetBlas
<
float16
>
(
context
).
GEMM
(
false
,
false
,
m
,
n
,
k
,
float16
(
1
),
a
,
3
,
b
+
1
,
4
,
float16
(
1
),
c
+
1
,
4
);
TensorCopySync
(
input3_gpu
,
cpu_place
,
&
input3
);
...
...
@@ -305,8 +312,8 @@ TEST(math_function, gemm_notrans_cublas_fp16) {
}
TEST
(
math_function
,
gemm_trans_cublas_fp32
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
input1
;
Tensor
input2
;
...
...
@@ -339,8 +346,8 @@ TEST(math_function, gemm_trans_cublas_fp32) {
float
*
b
=
input2_gpu
.
data
<
float
>
();
float
*
c
=
input3_gpu
.
mutable_data
<
float
>
(
gpu_place
);
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
false
,
true
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
3
,
3
,
1
,
c
+
1
,
4
);
GetBlas
<
float
>
(
context
).
GEMM
(
false
,
true
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
3
,
3
,
1
,
c
+
1
,
4
);
TensorCopySync
(
input3_gpu
,
cpu_place
,
&
input3
);
...
...
@@ -356,8 +363,8 @@ TEST(math_function, gemm_trans_cublas_fp32) {
}
TEST
(
math_function
,
gemm_trans_cublas_fp16
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
input1
;
Tensor
input2
;
...
...
@@ -393,9 +400,8 @@ TEST(math_function, gemm_trans_cublas_fp16) {
float16
*
b
=
input2_gpu
.
data
<
float16
>
();
float16
*
c
=
input3_gpu
.
mutable_data
<
float16
>
(
gpu_place
);
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float16
>
(
context
,
false
,
true
,
m
,
n
,
k
,
float16
(
1
),
a
,
3
,
b
+
3
,
3
,
float16
(
1
),
c
+
1
,
4
);
GetBlas
<
float16
>
(
context
).
GEMM
(
false
,
true
,
m
,
n
,
k
,
float16
(
1
),
a
,
3
,
b
+
3
,
3
,
float16
(
1
),
c
+
1
,
4
);
TensorCopySync
(
input3_gpu
,
cpu_place
,
&
input3
);
...
...
@@ -412,8 +418,8 @@ TEST(math_function, gemm_trans_cublas_fp16) {
template
<
typename
T
>
void
GemvTest
(
int
m
,
int
n
,
bool
trans
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
platform
;
// NOLINT
Tensor
mat_a
;
Tensor
vec_b
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录