未验证 提交 74e0eb72 编写于 作者: K kexinzhao 提交者: GitHub

make float16 a pod type (#8456)

上级 74404fad
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -52,7 +53,9 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> { ...@@ -52,7 +53,9 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
}; };
static inline size_t SizeOfType(std::type_index type) { static inline size_t SizeOfType(std::type_index type) {
SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t> functor; SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t,
platform::float16>
functor;
size_t size = functor(type); size_t size = functor(type);
PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name()); PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name());
return size; return size;
......
...@@ -62,6 +62,7 @@ limitations under the License. */ ...@@ -62,6 +62,7 @@ limitations under the License. */
#define PADDLE_ALIGN(x) __attribute__((aligned(x))) #define PADDLE_ALIGN(x) __attribute__((aligned(x)))
namespace paddle { namespace paddle {
namespace platform {
// Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated // Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated
// and aligned at least on a 2-byte boundary, which leads to efficient // and aligned at least on a 2-byte boundary, which leads to efficient
...@@ -71,11 +72,21 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -71,11 +72,21 @@ struct PADDLE_ALIGN(2) float16 {
public: public:
uint16_t x; uint16_t x;
// Constructors // The following defaulted special class member functions
HOSTDEVICE inline float16() : x(0) {} // are added to make float16 pass the std::is_trivial test
HOSTDEVICE inline float16() = default;
HOSTDEVICE inline float16(const float16& h) : x(h.x) {} HOSTDEVICE inline float16(const float16&) = default;
HOSTDEVICE inline float16& operator=(const float16&) = default;
HOSTDEVICE inline float16(float16&&) = default;
HOSTDEVICE inline float16& operator=(float16&&) = default;
HOSTDEVICE inline ~float16() = default;
// Constructors
#ifdef PADDLE_CUDA_FP16 #ifdef PADDLE_CUDA_FP16
HOSTDEVICE inline explicit float16(const half& h) { HOSTDEVICE inline explicit float16(const half& h) {
#if CUDA_VERSION >= 9000 #if CUDA_VERSION >= 9000
...@@ -136,11 +147,6 @@ struct PADDLE_ALIGN(2) float16 { ...@@ -136,11 +147,6 @@ struct PADDLE_ALIGN(2) float16 {
HOSTDEVICE inline explicit float16(const T& val) HOSTDEVICE inline explicit float16(const T& val)
: x(float16(static_cast<float>(val)).x) {} : x(float16(static_cast<float>(val)).x) {}
HOSTDEVICE inline float16& operator=(const float16& rhs) {
x = rhs.x;
return *this;
}
// Assignment operators // Assignment operators
#ifdef PADDLE_CUDA_FP16 #ifdef PADDLE_CUDA_FP16
HOSTDEVICE inline float16& operator=(const half& rhs) { HOSTDEVICE inline float16& operator=(const half& rhs) {
...@@ -727,4 +733,25 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) { ...@@ -727,4 +733,25 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
return float(a) >= float(b); return float(a) >= float(b);
} }
#endif #endif
} // namespace platform
} // namespace paddle } // namespace paddle
namespace std {
// Override the std::is_pod::value for float16
// The reason is that different compilers implemented std::is_pod based on
// different C++ standards. float16 class is a plain old data in C++11 given
// that it is both trivial and standard_layout.
// However, std::is_pod in nvcc 8.0 host c++ compiler follows C++0x and is
// more restricted in that you cannot provide any customized
// constructor in float16. Hence, we override is_pod here following C++11
// so that .cu files can be successfully compiled by nvcc.
template <>
struct is_pod<paddle::platform::float16> {
static const bool value =
is_trivial<paddle::platform::float16>::value &&
is_standard_layout<paddle::platform::float16>::value;
};
} // namespace std
...@@ -10,10 +10,13 @@ See the License for the specific language governing permissions and ...@@ -10,10 +10,13 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
namespace paddle { namespace paddle {
namespace platform {
TEST(float16, conversion_cpu) { TEST(float16, conversion_cpu) {
// Explicit conversion from Eigen::half // Explicit conversion from Eigen::half
...@@ -54,13 +57,9 @@ TEST(float16, conversion_cpu) { ...@@ -54,13 +57,9 @@ TEST(float16, conversion_cpu) {
EXPECT_EQ(float16(true).x, 0x3c00); EXPECT_EQ(float16(true).x, 0x3c00);
EXPECT_EQ(float16(false).x, 0x0000); EXPECT_EQ(float16(false).x, 0x0000);
// Default constructor
float16 v_def;
EXPECT_EQ(v_def.x, 0x0000);
// Assignment operator // Assignment operator
float16 v_assign; float16 v_assign;
v_assign = v_def; v_assign = float16(0);
EXPECT_EQ(v_assign.x, 0x0000); EXPECT_EQ(v_assign.x, 0x0000);
v_assign = Eigen::half(1.0f); v_assign = Eigen::half(1.0f);
EXPECT_EQ(v_assign.x, 0x3c00); EXPECT_EQ(v_assign.x, 0x3c00);
...@@ -116,4 +115,27 @@ TEST(float16, comparison_cpu) { ...@@ -116,4 +115,27 @@ TEST(float16, comparison_cpu) {
EXPECT_FALSE(float16(-0.0f) > float16(0.0f)); EXPECT_FALSE(float16(-0.0f) > float16(0.0f));
} }
TEST(float16, lod_tensor_cpu) {
framework::LoDTensor lod_tensor;
std::vector<float16> input_data = {float16(1.0f), float16(0.5f),
float16(0.33333f), float16(0.0f)};
EXPECT_EQ(input_data[0].x, 0x3c00);
EXPECT_EQ(input_data[1].x, 0x3800);
EXPECT_EQ(input_data[2].x, 0x3555);
EXPECT_EQ(input_data[3].x, 0x0000);
lod_tensor.Resize({4, 1});
lod_tensor.set_lod(framework::LoD({{0, 2, 4}}));
float16* data_ptr = lod_tensor.mutable_data<float16>(CPUPlace());
EXPECT_NE(data_ptr, nullptr);
EXPECT_EQ(input_data.size(), static_cast<size_t>(lod_tensor.numel()));
for (size_t i = 0; i < input_data.size(); ++i) {
data_ptr[i] = input_data[i];
EXPECT_EQ(data_ptr[i].x, input_data[i].x);
}
}
} // namespace platform
} // namespace paddle } // namespace paddle
...@@ -13,6 +13,8 @@ limitations under the License. */ ...@@ -13,6 +13,8 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#define ARITHMETIC_KERNEL(op_type, sign) \ #define ARITHMETIC_KERNEL(op_type, sign) \
...@@ -108,6 +110,7 @@ limitations under the License. */ ...@@ -108,6 +110,7 @@ limitations under the License. */
#ifdef PADDLE_CUDA_FP16 #ifdef PADDLE_CUDA_FP16
namespace paddle { namespace paddle {
namespace platform {
#if CUDA_VERSION < 9000 #if CUDA_VERSION < 9000
ARITHMETIC_KERNEL(Add, +) ARITHMETIC_KERNEL(Add, +)
...@@ -209,5 +212,35 @@ TEST(float16, conversion_on_gpu) { ...@@ -209,5 +212,35 @@ TEST(float16, conversion_on_gpu) {
EXPECT_EQ(v_assign.x, 0x3c00); EXPECT_EQ(v_assign.x, 0x3c00);
} }
TEST(float16, lod_tensor_on_gpu) {
framework::LoDTensor src_tensor;
framework::LoDTensor gpu_tensor;
framework::LoDTensor dst_tensor;
float16* src_ptr = src_tensor.mutable_data<float16>(
framework::make_ddim({2, 2}), CPUPlace());
float16 arr[4] = {float16(1.0f), float16(0.5f), float16(0.33333f),
float16(0.0f)};
memcpy(src_ptr, arr, 4 * sizeof(float16));
// CPU LoDTensor to GPU LoDTensor
CUDAPlace gpu_place(0);
CUDADeviceContext gpu_ctx(gpu_place);
framework::TensorCopy(src_tensor, gpu_place, gpu_ctx, &gpu_tensor);
// GPU LoDTensor to CPU LoDTensor
framework::TensorCopy(gpu_tensor, CPUPlace(), gpu_ctx, &dst_tensor);
// Sync before comparing LoDTensors
gpu_ctx.Wait();
const float16* dst_ptr = dst_tensor.data<float16>();
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 4; ++i) {
EXPECT_EQ(src_ptr[i].x, dst_ptr[i].x);
}
}
} // namespace platform
} // namespace paddle } // namespace paddle
#endif // PADDLE_CUDA_FP16 #endif // PADDLE_CUDA_FP16
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册