Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
74e0eb72
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
74e0eb72
编写于
2月 15, 2018
作者:
K
kexinzhao
提交者:
GitHub
2月 15, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make float16 a pod type (#8456)
上级
74404fad
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
99 addition
and
14 deletion
+99
-14
paddle/fluid/framework/tensor_impl.h
paddle/fluid/framework/tensor_impl.h
+4
-1
paddle/fluid/platform/float16.h
paddle/fluid/platform/float16.h
+35
-8
paddle/fluid/platform/float16_test.cc
paddle/fluid/platform/float16_test.cc
+27
-5
paddle/fluid/platform/float16_test.cu
paddle/fluid/platform/float16_test.cu
+33
-0
未找到文件。
paddle/fluid/framework/tensor_impl.h
浏览文件 @
74e0eb72
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -52,7 +53,9 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
};
static
inline
size_t
SizeOfType
(
std
::
type_index
type
)
{
SizeOfTypeFunctor
<
int
,
float
,
double
,
int16_t
,
int64_t
,
bool
,
size_t
>
functor
;
SizeOfTypeFunctor
<
int
,
float
,
double
,
int16_t
,
int64_t
,
bool
,
size_t
,
platform
::
float16
>
functor
;
size_t
size
=
functor
(
type
);
PADDLE_ENFORCE
(
size
!=
0UL
,
"Cannot get size of type %s"
,
type
.
name
());
return
size
;
...
...
paddle/fluid/platform/float16.h
浏览文件 @
74e0eb72
...
...
@@ -62,6 +62,7 @@ limitations under the License. */
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
namespace
paddle
{
namespace
platform
{
// Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated
// and aligned at least on a 2-byte boundary, which leads to efficient
...
...
@@ -71,11 +72,21 @@ struct PADDLE_ALIGN(2) float16 {
public:
uint16_t
x
;
// Constructors
HOSTDEVICE
inline
float16
()
:
x
(
0
)
{}
// The following defaulted special class member functions
// are added to make float16 pass the std::is_trivial test
HOSTDEVICE
inline
float16
()
=
default
;
HOSTDEVICE
inline
float16
(
const
float16
&
h
)
:
x
(
h
.
x
)
{}
HOSTDEVICE
inline
float16
(
const
float16
&
)
=
default
;
HOSTDEVICE
inline
float16
&
operator
=
(
const
float16
&
)
=
default
;
HOSTDEVICE
inline
float16
(
float16
&&
)
=
default
;
HOSTDEVICE
inline
float16
&
operator
=
(
float16
&&
)
=
default
;
HOSTDEVICE
inline
~
float16
()
=
default
;
// Constructors
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE
inline
explicit
float16
(
const
half
&
h
)
{
#if CUDA_VERSION >= 9000
...
...
@@ -136,11 +147,6 @@ struct PADDLE_ALIGN(2) float16 {
HOSTDEVICE
inline
explicit
float16
(
const
T
&
val
)
:
x
(
float16
(
static_cast
<
float
>
(
val
)).
x
)
{}
HOSTDEVICE
inline
float16
&
operator
=
(
const
float16
&
rhs
)
{
x
=
rhs
.
x
;
return
*
this
;
}
// Assignment operators
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE
inline
float16
&
operator
=
(
const
half
&
rhs
)
{
...
...
@@ -727,4 +733,25 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
return
float
(
a
)
>=
float
(
b
);
}
#endif
}
// namespace platform
}
// namespace paddle
namespace
std
{
// Override the std::is_pod::value for float16
// The reason is that different compilers implemented std::is_pod based on
// different C++ standards. float16 class is a plain old data in C++11 given
// that it is both trivial and standard_layout.
// However, std::is_pod in nvcc 8.0 host c++ compiler follows C++0x and is
// more restricted in that you cannot provide any customized
// constructor in float16. Hence, we override is_pod here following C++11
// so that .cu files can be successfully compiled by nvcc.
template
<
>
struct
is_pod
<
paddle
::
platform
::
float16
>
{
static
const
bool
value
=
is_trivial
<
paddle
::
platform
::
float16
>::
value
&&
is_standard_layout
<
paddle
::
platform
::
float16
>::
value
;
};
}
// namespace std
paddle/fluid/platform/float16_test.cc
浏览文件 @
74e0eb72
...
...
@@ -10,10 +10,13 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include <gtest/gtest.h>
namespace
paddle
{
namespace
platform
{
TEST
(
float16
,
conversion_cpu
)
{
// Explicit conversion from Eigen::half
...
...
@@ -54,13 +57,9 @@ TEST(float16, conversion_cpu) {
EXPECT_EQ
(
float16
(
true
).
x
,
0x3c00
);
EXPECT_EQ
(
float16
(
false
).
x
,
0x0000
);
// Default constructor
float16
v_def
;
EXPECT_EQ
(
v_def
.
x
,
0x0000
);
// Assignment operator
float16
v_assign
;
v_assign
=
v_def
;
v_assign
=
float16
(
0
)
;
EXPECT_EQ
(
v_assign
.
x
,
0x0000
);
v_assign
=
Eigen
::
half
(
1.0
f
);
EXPECT_EQ
(
v_assign
.
x
,
0x3c00
);
...
...
@@ -116,4 +115,27 @@ TEST(float16, comparison_cpu) {
EXPECT_FALSE
(
float16
(
-
0.0
f
)
>
float16
(
0.0
f
));
}
TEST
(
float16
,
lod_tensor_cpu
)
{
framework
::
LoDTensor
lod_tensor
;
std
::
vector
<
float16
>
input_data
=
{
float16
(
1.0
f
),
float16
(
0.5
f
),
float16
(
0.33333
f
),
float16
(
0.0
f
)};
EXPECT_EQ
(
input_data
[
0
].
x
,
0x3c00
);
EXPECT_EQ
(
input_data
[
1
].
x
,
0x3800
);
EXPECT_EQ
(
input_data
[
2
].
x
,
0x3555
);
EXPECT_EQ
(
input_data
[
3
].
x
,
0x0000
);
lod_tensor
.
Resize
({
4
,
1
});
lod_tensor
.
set_lod
(
framework
::
LoD
({{
0
,
2
,
4
}}));
float16
*
data_ptr
=
lod_tensor
.
mutable_data
<
float16
>
(
CPUPlace
());
EXPECT_NE
(
data_ptr
,
nullptr
);
EXPECT_EQ
(
input_data
.
size
(),
static_cast
<
size_t
>
(
lod_tensor
.
numel
()));
for
(
size_t
i
=
0
;
i
<
input_data
.
size
();
++
i
)
{
data_ptr
[
i
]
=
input_data
[
i
];
EXPECT_EQ
(
data_ptr
[
i
].
x
,
input_data
[
i
].
x
);
}
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/float16_test.cu
浏览文件 @
74e0eb72
...
...
@@ -13,6 +13,8 @@ limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/utils/Logging.h"
#define ARITHMETIC_KERNEL(op_type, sign) \
...
...
@@ -108,6 +110,7 @@ limitations under the License. */
#ifdef PADDLE_CUDA_FP16
namespace
paddle
{
namespace
platform
{
#if CUDA_VERSION < 9000
ARITHMETIC_KERNEL
(
Add
,
+
)
...
...
@@ -209,5 +212,35 @@ TEST(float16, conversion_on_gpu) {
EXPECT_EQ
(
v_assign
.
x
,
0x3c00
);
}
TEST
(
float16
,
lod_tensor_on_gpu
)
{
framework
::
LoDTensor
src_tensor
;
framework
::
LoDTensor
gpu_tensor
;
framework
::
LoDTensor
dst_tensor
;
float16
*
src_ptr
=
src_tensor
.
mutable_data
<
float16
>
(
framework
::
make_ddim
({
2
,
2
}),
CPUPlace
());
float16
arr
[
4
]
=
{
float16
(
1.0
f
),
float16
(
0.5
f
),
float16
(
0.33333
f
),
float16
(
0.0
f
)};
memcpy
(
src_ptr
,
arr
,
4
*
sizeof
(
float16
));
// CPU LoDTensor to GPU LoDTensor
CUDAPlace
gpu_place
(
0
);
CUDADeviceContext
gpu_ctx
(
gpu_place
);
framework
::
TensorCopy
(
src_tensor
,
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
// GPU LoDTensor to CPU LoDTensor
framework
::
TensorCopy
(
gpu_tensor
,
CPUPlace
(),
gpu_ctx
,
&
dst_tensor
);
// Sync before comparing LoDTensors
gpu_ctx
.
Wait
();
const
float16
*
dst_ptr
=
dst_tensor
.
data
<
float16
>
();
ASSERT_NE
(
src_ptr
,
dst_ptr
);
for
(
size_t
i
=
0
;
i
<
4
;
++
i
)
{
EXPECT_EQ
(
src_ptr
[
i
].
x
,
dst_ptr
[
i
].
x
);
}
}
}
// namespace platform
}
// namespace paddle
#endif // PADDLE_CUDA_FP16
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录