未验证 提交 f82fa64a 编写于 作者: K kexinzhao 提交者: GitHub

Move float16 into fluid folder (#8394)

* move float16 into fluid

* fix include

* move to platform folder
上级 432d2b5d
......@@ -27,7 +27,7 @@ ELSE()
set(MKLDNN_CTX_DEPS)
ENDIF()
# memcpy deoends on device_context, here add deps individually for
# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS})
......@@ -39,3 +39,6 @@ nv_test(nccl_test SRCS nccl_test.cu DEPS dynload_cuda gpu_info device_context)
cc_library(profiler SRCS profiler.cc DEPS device_context)
cc_test(profiler_test SRCS profiler_test.cc DEPS profiler)
nv_test(float16_gpu_test SRCS float16_test.cu)
cc_test(float16_test SRCS float16_test.cc)
......@@ -68,7 +68,7 @@ namespace paddle {
// memory access of float16 struct and also makes float16 compatible
// with CUDA half, ARM float16_t, and Eigen::half data types.
struct PADDLE_ALIGN(2) float16 {
public:
public:
uint16_t x;
// Constructors
......@@ -319,7 +319,7 @@ public:
return static_cast<double>(float(*this));
}
private:
private:
union Bits {
float f;
int32_t si;
......@@ -485,8 +485,7 @@ HOST inline float16 operator+(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&(res.x))
: // clobbers
"memory", "v0", "v1");
......@@ -502,8 +501,7 @@ HOST inline float16 operator-(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&(res.x))
: // clobbers
"memory", "v0", "v1");
......@@ -519,8 +517,7 @@ HOST inline float16 operator*(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&(res.x))
: // clobbers
"memory", "v0", "v1");
......@@ -536,8 +533,7 @@ HOST inline float16 operator/(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&(res.x))
: // clobbers
"memory", "v0", "v1");
......@@ -588,8 +584,7 @@ HOST inline bool operator==(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res)
: // clobbers
"memory", "v0", "v1");
......@@ -609,8 +604,7 @@ HOST inline bool operator<(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res)
: // clobbers
"memory", "v0", "v1");
......@@ -626,8 +620,7 @@ HOST inline bool operator<=(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res)
: // clobbers
"memory", "v0", "v1");
......@@ -643,8 +636,7 @@ HOST inline bool operator>(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res)
: // clobbers
"memory", "v0", "v1");
......@@ -660,8 +652,7 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
"st1 {v0.h}[0], [%[res_ptr]]\n"
: // outputs
: // inputs
[a_ptr] "r"(&(a.x)),
[b_ptr] "r"(&(b.x)),
[a_ptr] "r"(&(a.x)), [b_ptr] "r"(&(b.x)),
[res_ptr] "r"(&res)
: // clobbers
"memory", "v0", "v1");
......
......@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/math/float16.h"
#include "paddle/fluid/platform/float16.h"
#include <gtest/gtest.h>
......
......@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/math/float16.h"
#include "paddle/fluid/platform/float16.h"
#include <gtest/gtest.h>
......
......@@ -22,7 +22,6 @@ if(WITH_GPU)
link_paddle_test(test_Tensor)
CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu)
link_paddle_test(test_lazyAssign)
nv_test(test_float16_gpu SRCS test_float16.cu)
else()
compile_cu_as_cpp(test_Tensor.cu)
add_unittest(test_Tensor test_Tensor.cu)
......@@ -34,4 +33,3 @@ add_simple_unittest(test_FPException)
add_simple_unittest(test_GpuProfiler)
add_simple_unittest(test_BaseMatrix)
add_simple_unittest(test_Matrix)
add_simple_unittest(test_float16)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册