Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
74e0eb72
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
74e0eb72
编写于
2月 15, 2018
作者:
K
kexinzhao
提交者:
GitHub
2月 15, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make float16 a pod type (#8456)
上级
74404fad
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
99 addition
and
14 deletion
+99
-14
paddle/fluid/framework/tensor_impl.h
paddle/fluid/framework/tensor_impl.h
+4
-1
paddle/fluid/platform/float16.h
paddle/fluid/platform/float16.h
+35
-8
paddle/fluid/platform/float16_test.cc
paddle/fluid/platform/float16_test.cc
+27
-5
paddle/fluid/platform/float16_test.cu
paddle/fluid/platform/float16_test.cu
+33
-0
未找到文件。
paddle/fluid/framework/tensor_impl.h
浏览文件 @
74e0eb72
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -52,7 +53,9 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
};
static
inline
size_t
SizeOfType
(
std
::
type_index
type
)
{
SizeOfTypeFunctor
<
int
,
float
,
double
,
int16_t
,
int64_t
,
bool
,
size_t
>
functor
;
SizeOfTypeFunctor
<
int
,
float
,
double
,
int16_t
,
int64_t
,
bool
,
size_t
,
platform
::
float16
>
functor
;
size_t
size
=
functor
(
type
);
PADDLE_ENFORCE
(
size
!=
0UL
,
"Cannot get size of type %s"
,
type
.
name
());
return
size
;
...
...
paddle/fluid/platform/float16.h
浏览文件 @
74e0eb72
...
...
@@ -62,6 +62,7 @@ limitations under the License. */
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
namespace
paddle
{
namespace
platform
{
// Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated
// and aligned at least on a 2-byte boundary, which leads to efficient
...
...
@@ -71,11 +72,21 @@ struct PADDLE_ALIGN(2) float16 {
public:
uint16_t
x
;
// Constructors
HOSTDEVICE
inline
float16
()
:
x
(
0
)
{}
// The following defaulted special class member functions
// are added to make float16 pass the std::is_trivial test
HOSTDEVICE
inline
float16
()
=
default
;
HOSTDEVICE
inline
float16
(
const
float16
&
h
)
:
x
(
h
.
x
)
{}
HOSTDEVICE
inline
float16
(
const
float16
&
)
=
default
;
HOSTDEVICE
inline
float16
&
operator
=
(
const
float16
&
)
=
default
;
HOSTDEVICE
inline
float16
(
float16
&&
)
=
default
;
HOSTDEVICE
inline
float16
&
operator
=
(
float16
&&
)
=
default
;
HOSTDEVICE
inline
~
float16
()
=
default
;
// Constructors
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE
inline
explicit
float16
(
const
half
&
h
)
{
#if CUDA_VERSION >= 9000
...
...
@@ -136,11 +147,6 @@ struct PADDLE_ALIGN(2) float16 {
HOSTDEVICE
inline
explicit
float16
(
const
T
&
val
)
:
x
(
float16
(
static_cast
<
float
>
(
val
)).
x
)
{}
HOSTDEVICE
inline
float16
&
operator
=
(
const
float16
&
rhs
)
{
x
=
rhs
.
x
;
return
*
this
;
}
// Assignment operators
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE
inline
float16
&
operator
=
(
const
half
&
rhs
)
{
...
...
@@ -727,4 +733,25 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
return
float
(
a
)
>=
float
(
b
);
}
#endif
}
// namespace platform
}
// namespace paddle
namespace
std
{
// Override the std::is_pod::value for float16
// The reason is that different compilers implemented std::is_pod based on
// different C++ standards. float16 class is a plain old data in C++11 given
// that it is both trivial and standard_layout.
// However, std::is_pod in nvcc 8.0 host c++ compiler follows C++0x and is
// more restricted in that you cannot provide any customized
// constructor in float16. Hence, we override is_pod here following C++11
// so that .cu files can be successfully compiled by nvcc.
template
<
>
struct
is_pod
<
paddle
::
platform
::
float16
>
{
static
const
bool
value
=
is_trivial
<
paddle
::
platform
::
float16
>::
value
&&
is_standard_layout
<
paddle
::
platform
::
float16
>::
value
;
};
}
// namespace std
paddle/fluid/platform/float16_test.cc
浏览文件 @
74e0eb72
...
...
@@ -10,10 +10,13 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include <gtest/gtest.h>
namespace
paddle
{
namespace
platform
{
TEST
(
float16
,
conversion_cpu
)
{
// Explicit conversion from Eigen::half
...
...
@@ -54,13 +57,9 @@ TEST(float16, conversion_cpu) {
EXPECT_EQ
(
float16
(
true
).
x
,
0x3c00
);
EXPECT_EQ
(
float16
(
false
).
x
,
0x0000
);
// Default constructor
float16
v_def
;
EXPECT_EQ
(
v_def
.
x
,
0x0000
);
// Assignment operator
float16
v_assign
;
v_assign
=
v_def
;
v_assign
=
float16
(
0
)
;
EXPECT_EQ
(
v_assign
.
x
,
0x0000
);
v_assign
=
Eigen
::
half
(
1.0
f
);
EXPECT_EQ
(
v_assign
.
x
,
0x3c00
);
...
...
@@ -116,4 +115,27 @@ TEST(float16, comparison_cpu) {
EXPECT_FALSE
(
float16
(
-
0.0
f
)
>
float16
(
0.0
f
));
}
TEST
(
float16
,
lod_tensor_cpu
)
{
framework
::
LoDTensor
lod_tensor
;
std
::
vector
<
float16
>
input_data
=
{
float16
(
1.0
f
),
float16
(
0.5
f
),
float16
(
0.33333
f
),
float16
(
0.0
f
)};
EXPECT_EQ
(
input_data
[
0
].
x
,
0x3c00
);
EXPECT_EQ
(
input_data
[
1
].
x
,
0x3800
);
EXPECT_EQ
(
input_data
[
2
].
x
,
0x3555
);
EXPECT_EQ
(
input_data
[
3
].
x
,
0x0000
);
lod_tensor
.
Resize
({
4
,
1
});
lod_tensor
.
set_lod
(
framework
::
LoD
({{
0
,
2
,
4
}}));
float16
*
data_ptr
=
lod_tensor
.
mutable_data
<
float16
>
(
CPUPlace
());
EXPECT_NE
(
data_ptr
,
nullptr
);
EXPECT_EQ
(
input_data
.
size
(),
static_cast
<
size_t
>
(
lod_tensor
.
numel
()));
for
(
size_t
i
=
0
;
i
<
input_data
.
size
();
++
i
)
{
data_ptr
[
i
]
=
input_data
[
i
];
EXPECT_EQ
(
data_ptr
[
i
].
x
,
input_data
[
i
].
x
);
}
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/float16_test.cu
浏览文件 @
74e0eb72
...
...
@@ -13,6 +13,8 @@ limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/utils/Logging.h"
#define ARITHMETIC_KERNEL(op_type, sign) \
...
...
@@ -108,6 +110,7 @@ limitations under the License. */
#ifdef PADDLE_CUDA_FP16
namespace
paddle
{
namespace
platform
{
#if CUDA_VERSION < 9000
ARITHMETIC_KERNEL
(
Add
,
+
)
...
...
@@ -209,5 +212,35 @@ TEST(float16, conversion_on_gpu) {
EXPECT_EQ
(
v_assign
.
x
,
0x3c00
);
}
TEST
(
float16
,
lod_tensor_on_gpu
)
{
framework
::
LoDTensor
src_tensor
;
framework
::
LoDTensor
gpu_tensor
;
framework
::
LoDTensor
dst_tensor
;
float16
*
src_ptr
=
src_tensor
.
mutable_data
<
float16
>
(
framework
::
make_ddim
({
2
,
2
}),
CPUPlace
());
float16
arr
[
4
]
=
{
float16
(
1.0
f
),
float16
(
0.5
f
),
float16
(
0.33333
f
),
float16
(
0.0
f
)};
memcpy
(
src_ptr
,
arr
,
4
*
sizeof
(
float16
));
// CPU LoDTensor to GPU LoDTensor
CUDAPlace
gpu_place
(
0
);
CUDADeviceContext
gpu_ctx
(
gpu_place
);
framework
::
TensorCopy
(
src_tensor
,
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
// GPU LoDTensor to CPU LoDTensor
framework
::
TensorCopy
(
gpu_tensor
,
CPUPlace
(),
gpu_ctx
,
&
dst_tensor
);
// Sync before comparing LoDTensors
gpu_ctx
.
Wait
();
const
float16
*
dst_ptr
=
dst_tensor
.
data
<
float16
>
();
ASSERT_NE
(
src_ptr
,
dst_ptr
);
for
(
size_t
i
=
0
;
i
<
4
;
++
i
)
{
EXPECT_EQ
(
src_ptr
[
i
].
x
,
dst_ptr
[
i
].
x
);
}
}
}
// namespace platform
}
// namespace paddle
#endif // PADDLE_CUDA_FP16
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录