未验证 提交 f82fa64a 编写于 作者: K kexinzhao 提交者: GitHub

Move float16 into fluid folder (#8394)

* move float16 into fluid

* fix include

* move to platform folder
上级 432d2b5d
...@@ -27,7 +27,7 @@ ELSE() ...@@ -27,7 +27,7 @@ ELSE()
set(MKLDNN_CTX_DEPS) set(MKLDNN_CTX_DEPS)
ENDIF() ENDIF()
# memcpy deoends on device_context, here add deps individually for # memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies # avoiding cycle dependencies
cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS})
...@@ -39,3 +39,6 @@ nv_test(nccl_test SRCS nccl_test.cu DEPS dynload_cuda gpu_info device_context) ...@@ -39,3 +39,6 @@ nv_test(nccl_test SRCS nccl_test.cu DEPS dynload_cuda gpu_info device_context)
cc_library(profiler SRCS profiler.cc DEPS device_context) cc_library(profiler SRCS profiler.cc DEPS device_context)
cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler)
nv_test(float16_gpu_test SRCS float16_test.cu)
cc_test(float16_test SRCS float16_test.cc)
...@@ -68,7 +68,7 @@ namespace paddle { ...@@ -68,7 +68,7 @@ namespace paddle {
// memory access of float16 struct and also makes float16 compatible // memory access of float16 struct and also makes float16 compatible
// with CUDA half, ARM float16_t, and Eigen::half data types. // with CUDA half, ARM float16_t, and Eigen::half data types.
struct PADDLE_ALIGN(2) float16 { struct PADDLE_ALIGN(2) float16 {
public: public:
uint16_t x; uint16_t x;
// Constructors // Constructors
...@@ -319,7 +319,7 @@ public: ...@@ -319,7 +319,7 @@ public:
return static_cast<double>(float(*this)); return static_cast<double>(float(*this));
} }
private: private:
union Bits { union Bits {
float f; float f;
int32_t si; int32_t si;
...@@ -485,8 +485,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) { ...@@ -485,8 +485,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&(res.x)) [res_ptr] "r"(&(res.x))
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
...@@ -502,8 +501,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) { ...@@ -502,8 +501,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&(res.x)) [res_ptr] "r"(&(res.x))
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
...@@ -519,8 +517,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) { ...@@ -519,8 +517,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&(res.x)) [res_ptr] "r"(&(res.x))
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
...@@ -536,8 +533,7 @@ HOST inline float16 operator/(const float16& a, const float16& b) { ...@@ -536,8 +533,7 @@ HOST inline float16 operator/(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&(res.x)) [res_ptr] "r"(&(res.x))
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
...@@ -588,8 +584,7 @@ HOST inline bool operator==(const float16& a, const float16& b) { ...@@ -588,8 +584,7 @@ HOST inline bool operator==(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res) [res_ptr] "r"(&res)
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
...@@ -609,8 +604,7 @@ HOST inline bool operator<(const float16& a, const float16& b) { ...@@ -609,8 +604,7 @@ HOST inline bool operator<(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res) [res_ptr] "r"(&res)
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
...@@ -626,8 +620,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) { ...@@ -626,8 +620,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res) [res_ptr] "r"(&res)
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
...@@ -643,8 +636,7 @@ HOST inline bool operator>(const float16& a, const float16& b) { ...@@ -643,8 +636,7 @@ HOST inline bool operator>(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res) [res_ptr] "r"(&res)
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
...@@ -660,8 +652,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) { ...@@ -660,8 +652,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n" "st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs : // outputs
: // inputs : // inputs
[a_ptr] "r"(&(a.x)), [a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res) [res_ptr] "r"(&res)
: // clobbers : // clobbers
"memory", "v0", "v1"); "memory", "v0", "v1");
......
...@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/math/float16.h" #include "paddle/fluid/platform/float16.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
......
...@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/math/float16.h" #include "paddle/fluid/platform/float16.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
......
...@@ -22,7 +22,6 @@ if(WITH_GPU) ...@@ -22,7 +22,6 @@ if(WITH_GPU)
link_paddle_test(test_Tensor) link_paddle_test(test_Tensor)
CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu) CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu)
link_paddle_test(test_lazyAssign) link_paddle_test(test_lazyAssign)
nv_test(test_float16_gpu SRCS test_float16.cu)
else() else()
compile_cu_as_cpp(test_Tensor.cu) compile_cu_as_cpp(test_Tensor.cu)
add_unittest(test_Tensor test_Tensor.cu) add_unittest(test_Tensor test_Tensor.cu)
...@@ -34,4 +33,3 @@ add_simple_unittest(test_FPException) ...@@ -34,4 +33,3 @@ add_simple_unittest(test_FPException)
add_simple_unittest(test_GpuProfiler) add_simple_unittest(test_GpuProfiler)
add_simple_unittest(test_BaseMatrix) add_simple_unittest(test_BaseMatrix)
add_simple_unittest(test_Matrix) add_simple_unittest(test_Matrix)
add_simple_unittest(test_float16)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册