未验证 提交 f1143f0c 编写于 作者: A Aurelius84 提交者: GitHub

[Pten] Migrate bfloat16/float16/complex from paddle::platform into pten::common (#39044)

* Migrate bfloat16/float16/complex from platform into pten::common

* fix typo

* fix code style
上级 655f76d2
......@@ -228,13 +228,7 @@ copy(inference_lib_dist
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/common/*.h
${PADDLE_SOURCE_DIR}/paddle/fluid/platform/bfloat16.h
${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex.h
${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/)
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/)
copy(inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/any.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)
......
# Adapt to custom op mechanism: Include the header files related to the data type
# to avoid exposing the path of the underlying file, remove it after moving
# float16.h/complex.h/bfloat16.h into pten
include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform)
add_subdirectory(memory)
add_subdirectory(platform)
add_subdirectory(distributed)
......
......@@ -24,17 +24,6 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace platform {
struct bfloat16;
template <typename T>
struct complex;
struct float16;
template <typename T>
struct complex;
} // namespace platform
} // namespace paddle
namespace paddle {
namespace framework {
......
......@@ -18,13 +18,6 @@
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
namespace paddle {
namespace platform {
struct bfloat16;
struct float16;
} // namespace platform
} // namespace paddle
TEST(DataType, float16) {
using paddle::framework::Tensor;
using paddle::platform::CPUPlace;
......
......@@ -14,13 +14,6 @@
#include "paddle/fluid/framework/dlpack_tensor.h"
#include "paddle/fluid/framework/data_type.h"
namespace paddle {
namespace platform {
struct bfloat16;
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
namespace framework {
......
......@@ -16,12 +16,6 @@
#include <glog/logging.h>
#include <gtest/gtest.h>
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
namespace framework {
......
......@@ -17,12 +17,6 @@
#include <gtest/gtest.h>
#include <string>
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace framework = paddle::framework;
namespace platform = paddle::platform;
......
......@@ -24,9 +24,6 @@ class Variable;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -26,9 +26,6 @@ class Variable;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -25,9 +25,6 @@ class Variable;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct ASCENDPlace;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct ASCENDPlace;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct ASCENDPlace;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -21,9 +21,6 @@ class OpDesc;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct ASCENDPlace;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_reduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct ASCENDPlace;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_reduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_reduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct ASCENDPlace;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_reduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_reduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct ASCENDPlace;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_reduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_reduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct ASCENDPlace;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -14,13 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/c_reduce_op.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......
......@@ -26,9 +26,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -18,13 +18,6 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace paddle {
namespace platform {
template <typename T>
struct complex;
} // namespace platform
} // namespace paddle
namespace paddle {
namespace framework {
class OpDesc;
......
......@@ -18,13 +18,6 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace paddle {
namespace platform {
template <typename T>
struct complex;
} // namespace platform
} // namespace paddle
namespace paddle {
namespace framework {
class OpDesc;
......
......@@ -34,7 +34,6 @@ class OverflowKernel;
} // namespace operators
namespace platform {
class CPUDeviceContext;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -16,6 +16,8 @@ limitations under the License. */
#ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/float16.h"
namespace pten {
class DenseTensor;
......@@ -25,8 +27,6 @@ namespace paddle {
namespace framework {} // namespace framework
namespace platform {
class CPUDeviceContext;
struct bfloat16;
struct float16;
} // namespace platform
} // namespace paddle
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/prelu.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
......@@ -110,15 +111,15 @@ void PreluScalarDirectCUDAFunctor<T>::operator()(gpuStream_t stream,
}
template class PreluChannelWiseDirectCUDAFunctor<float>;
template class PreluChannelWiseDirectCUDAFunctor<paddle::platform::float16>;
template class PreluChannelWiseDirectCUDAFunctor<platform::float16>;
template class PreluChannelWiseDirectCUDAFunctor<double>;
template class PreluElementWiseDirectCUDAFunctor<float>;
template class PreluElementWiseDirectCUDAFunctor<paddle::platform::float16>;
template class PreluElementWiseDirectCUDAFunctor<platform::float16>;
template class PreluElementWiseDirectCUDAFunctor<double>;
template class PreluScalarDirectCUDAFunctor<float>;
template class PreluScalarDirectCUDAFunctor<paddle::platform::float16>;
template class PreluScalarDirectCUDAFunctor<platform::float16>;
template class PreluScalarDirectCUDAFunctor<double>;
} // namespace math
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -23,9 +23,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -26,9 +26,6 @@ class EmptyGradOpMaker;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -30,9 +30,6 @@ class OpDesc;
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
......
......@@ -14,396 +14,11 @@
#pragma once
#include <stdint.h>
#include <cmath>
#include <cstring>
#include <iostream>
#include <limits>
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif
#if defined(__CUDACC__) && CUDA_VERSION >= 11000
#define PADDLE_CUDA_BF16
#include <cuda_bf16.h>
#endif
#if !defined(_WIN32)
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
#else
#define PADDLE_ALIGN(x) __declspec(align(x))
#endif
#if (defined(__CUDACC__) || defined(__HIPCC__))
#define HOSTDEVICE __host__ __device__
#define DEVICE __device__
#define HOST __host__
#else
#define HOSTDEVICE
#define DEVICE
#define HOST
#endif
#include "paddle/pten/common/bfloat16.h"
namespace paddle {
namespace platform {
struct PADDLE_ALIGN(2) bfloat16 {
public:
uint16_t x;
// Constructors
bfloat16() = default;
bfloat16(const bfloat16& o) = default;
bfloat16& operator=(const bfloat16& o) = default;
bfloat16(bfloat16&& o) = default;
bfloat16& operator=(bfloat16&& o) = default;
~bfloat16() = default;
HOSTDEVICE inline explicit bfloat16(float val) {
#ifdef PADDLE_WITH_HIP
uint32_t res = 0;
uint32_t* tempRes;
// We should be using memcpy in order to respect the strict aliasing rule
// but it fails in the HIP environment.
tempRes = reinterpret_cast<uint32_t*>(&val);
res = *tempRes;
x = res >> 16;
#else
#if defined(PADDLE_CUDA_BF16)
__nv_bfloat16 tmp = __float2bfloat16(val);
x = *reinterpret_cast<uint16_t*>(&tmp);
#else
std::memcpy(&x, reinterpret_cast<char*>(&val) + 2, 2);
#endif
#endif
}
#if defined(PADDLE_CUDA_BF16)
HOSTDEVICE inline explicit bfloat16(const __nv_bfloat16& val) {
x = *reinterpret_cast<const unsigned short*>(&val);
}
#endif
template <class T>
HOSTDEVICE inline explicit bfloat16(const T& val)
: x(bfloat16(static_cast<float>(val)).x) {}
// Assignment operators
#if defined(PADDLE_CUDA_BF16)
HOSTDEVICE inline bfloat16& operator=(const __nv_bfloat16& val) {
x = *reinterpret_cast<const unsigned short*>(&val);
return *this;
}
#endif
HOSTDEVICE inline bfloat16& operator=(bool b) {
x = b ? 0x3f80 : 0;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(int8_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(uint8_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(int16_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(uint16_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(int32_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(uint32_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(int64_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(uint64_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(float val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(double val) {
x = bfloat16(val).x;
return *this;
}
// Conversion opertors
HOSTDEVICE inline explicit operator float() const {
#ifdef PADDLE_WITH_HIP
uint32_t res = 0;
// We should be using memcpy in order to respect the strict aliasing rule
// but it fails in the HIP environment.
uint16_t temp = x;
uint16_t* temp_ptr = reinterpret_cast<uint16_t*>(&temp);
res = *temp_ptr;
return res;
#else
#ifdef PADDLE_CUDA_BF16
return __bfloat162float(*reinterpret_cast<const __nv_bfloat16*>(&x));
#else
float val = 0.f;
uint16_t temp = x;
std::memcpy(reinterpret_cast<char*>(&val) + 2,
reinterpret_cast<char*>(&temp), 2);
return val;
#endif
#endif
}
#ifdef PADDLE_CUDA_BF16
HOSTDEVICE inline explicit operator __nv_bfloat16() const {
return *reinterpret_cast<const __nv_bfloat16*>(&x);
}
#endif
HOSTDEVICE inline explicit operator bool() const { return (x & 0x7fff) != 0; }
HOSTDEVICE inline explicit operator int8_t() const {
return static_cast<int8_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator uint8_t() const {
return static_cast<uint8_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator int16_t() const {
return static_cast<int16_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator uint16_t() const {
return static_cast<uint16_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator int32_t() const {
return static_cast<int32_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator uint32_t() const {
return static_cast<uint32_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator int64_t() const {
return static_cast<int64_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator uint64_t() const {
return static_cast<uint64_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator double() const {
return static_cast<double>(static_cast<float>(*this));
}
};
HOSTDEVICE inline bfloat16 operator+(const bfloat16& a, const bfloat16& b) {
return bfloat16(static_cast<float>(a) + static_cast<float>(b));
}
HOSTDEVICE inline bfloat16 operator-(const bfloat16& a, const bfloat16& b) {
return bfloat16(static_cast<float>(a) - static_cast<float>(b));
}
HOSTDEVICE inline bfloat16 operator*(const bfloat16& a, const bfloat16& b) {
return bfloat16(static_cast<float>(a) * static_cast<float>(b));
}
HOSTDEVICE inline bfloat16 operator/(const bfloat16& a, const bfloat16& b) {
return bfloat16(static_cast<float>(a) / static_cast<float>(b));
}
HOSTDEVICE inline bfloat16 operator-(const bfloat16& a) {
bfloat16 res;
res.x = a.x ^ 0x8000;
return res;
}
HOSTDEVICE inline bfloat16& operator+=(bfloat16& a, // NOLINT
const bfloat16& b) {
a = bfloat16(static_cast<float>(a) + static_cast<float>(b));
return a;
}
HOSTDEVICE inline bfloat16& operator-=(bfloat16& a, // NOLINT
const bfloat16& b) {
a = bfloat16(static_cast<float>(a) - static_cast<float>(b));
return a;
}
HOSTDEVICE inline bfloat16& operator*=(bfloat16& a, // NOLINT
const bfloat16& b) {
a = bfloat16(static_cast<float>(a) * static_cast<float>(b));
return a;
}
HOSTDEVICE inline bfloat16& operator/=(bfloat16& a, // NOLINT
const bfloat16& b) {
a = bfloat16(static_cast<float>(a) / static_cast<float>(b));
return a;
}
HOSTDEVICE inline bfloat16 raw_uint16_to_bfloat16(uint16_t a) {
bfloat16 res;
res.x = a;
return res;
}
// Comparison operators
HOSTDEVICE inline bool operator==(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) == static_cast<float>(b);
}
HOSTDEVICE inline bool operator!=(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) != static_cast<float>(b);
}
HOSTDEVICE inline bool operator<(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) < static_cast<float>(b);
}
HOSTDEVICE inline bool operator<=(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) <= static_cast<float>(b);
}
HOSTDEVICE inline bool operator>(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) > static_cast<float>(b);
}
HOSTDEVICE inline bool operator>=(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) >= static_cast<float>(b);
}
HOSTDEVICE inline bool(isnan)(const bfloat16& a) {
return (a.x & 0x7FFF) > 0x7F80;
}
HOSTDEVICE inline bool(isinf)(const bfloat16& a) {
return (a.x & 0x7F80) == 0x7F80;
}
HOSTDEVICE inline bool(isfinite)(const bfloat16& a) {
return !((isnan)(a)) && !((isinf)(a));
}
inline std::ostream& operator<<(std::ostream& os, const bfloat16& a) {
os << a.x;
return os;
}
using bfloat16 = pten::dtype::bfloat16;
using namespace pten::dtype; // NOLINT
} // namespace platform
} // namespace paddle
namespace std {
template <>
struct is_pod<paddle::platform::bfloat16> {
static const bool value =
is_trivial<paddle::platform::bfloat16>::value &&
is_standard_layout<paddle::platform::bfloat16>::value;
};
template <>
struct is_floating_point<paddle::platform::bfloat16>
: std::integral_constant<
bool, std::is_same<paddle::platform::bfloat16,
typename std::remove_cv<
paddle::platform::bfloat16>::type>::value> {};
template <>
struct is_signed<paddle::platform::bfloat16> {
static const bool value = true;
};
template <>
struct is_unsigned<paddle::platform::bfloat16> {
static const bool value = false;
};
inline bool isnan(const paddle::platform::bfloat16& a) {
return paddle::platform::isnan(a);
}
inline bool isinf(const paddle::platform::bfloat16& a) {
return paddle::platform::isinf(a);
}
template <>
struct numeric_limits<paddle::platform::bfloat16> {
static const bool is_specialized = true;
static const bool is_signed = true;
static const bool is_integer = false;
static const bool is_exact = false;
static const bool has_infinity = true;
static const bool has_quiet_NaN = true;
static const bool has_signaling_NaN = true;
static const float_denorm_style has_denorm = denorm_present;
static const bool has_denorm_loss = false;
static const std::float_round_style round_style = std::round_to_nearest;
static const bool is_iec559 = false;
static const bool is_bounded = false;
static const bool is_modulo = false;
static const int digits = 8;
static const int digits10 = 2;
static const int max_digits10 = 9;
static const int radix = 2;
static const int min_exponent = -125;
static const int min_exponent10 = -37;
static const int max_exponent = 128;
static const int max_exponent10 = 38;
static const bool traps = true;
static const bool tinyness_before = false;
static paddle::platform::bfloat16(min)() {
return paddle::platform::raw_uint16_to_bfloat16(0x007f);
}
static paddle::platform::bfloat16 lowest() {
return paddle::platform::raw_uint16_to_bfloat16(0xff7f);
}
static paddle::platform::bfloat16(max)() {
return paddle::platform::raw_uint16_to_bfloat16(0x7f7f);
}
static paddle::platform::bfloat16 epsilon() {
return paddle::platform::raw_uint16_to_bfloat16(0x3400);
}
static paddle::platform::bfloat16 round_error() {
return paddle::platform::bfloat16(0.5);
}
static paddle::platform::bfloat16 infinity() {
return paddle::platform::raw_uint16_to_bfloat16(0x7f80);
}
static paddle::platform::bfloat16 quiet_NaN() {
return paddle::platform::raw_uint16_to_bfloat16(0xffc1);
}
static paddle::platform::bfloat16 signaling_NaN() {
return paddle::platform::raw_uint16_to_bfloat16(0xff81);
}
static paddle::platform::bfloat16 denorm_min() {
return paddle::platform::raw_uint16_to_bfloat16(0x0001);
}
};
} // namespace std
......@@ -14,536 +14,14 @@
#pragma once
#include <stdint.h>
#include <complex>
#include <cstring>
#include <iostream>
#include <limits>
#ifdef PADDLE_WITH_CUDA
#include <cuComplex.h>
#include <thrust/complex.h>
#endif // PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_HIP
#include <hip/hip_complex.h>
#include <thrust/complex.h> // NOLINT
#endif
#if !defined(_WIN32)
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
#else
#define PADDLE_ALIGN(x) __declspec(align(x))
#endif
#if (defined(__CUDACC__) || defined(__HIPCC__))
#define HOSTDEVICE __host__ __device__
#define DEVICE __device__
#define HOST __host__
#else
#define HOSTDEVICE
#define DEVICE
#define HOST
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// todo
#define PADDLE_WITH_CUDA_OR_HIP_COMPLEX
#endif
#include "paddle/pten/common/complex.h"
namespace paddle {
namespace platform {
template <typename T>
struct PADDLE_ALIGN(sizeof(T) * 2) complex {
public:
T real;
T imag;
using value_type = T;
complex() = default;
complex(const complex<T>& o) = default;
complex& operator=(const complex<T>& o) = default;
complex(complex<T>&& o) = default;
complex& operator=(complex<T>&& o) = default;
~complex() = default;
HOSTDEVICE complex(T real, T imag) : real(real), imag(imag) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <typename T1>
HOSTDEVICE inline explicit complex(const thrust::complex<T1>& c) {
real = c.real();
imag = c.imag();
}
template <typename T1>
HOSTDEVICE inline explicit operator thrust::complex<T1>() const {
return thrust::complex<T1>(real, imag);
}
#ifdef PADDLE_WITH_HIP
HOSTDEVICE inline explicit operator hipFloatComplex() const {
return make_hipFloatComplex(real, imag);
}
HOSTDEVICE inline explicit operator hipDoubleComplex() const {
return make_hipDoubleComplex(real, imag);
}
#else
HOSTDEVICE inline explicit operator cuFloatComplex() const {
return make_cuFloatComplex(real, imag);
}
HOSTDEVICE inline explicit operator cuDoubleComplex() const {
return make_cuDoubleComplex(real, imag);
}
#endif
#endif
template <typename T1,
typename std::enable_if<std::is_floating_point<T1>::value ||
std::is_integral<T1>::value,
int>::type = 0>
HOSTDEVICE complex(const T1& val) {
real = static_cast<T>(val);
imag = static_cast<T>(0.0);
}
template <typename T1 = T>
HOSTDEVICE explicit complex(
const std::enable_if_t<std::is_same<T1, float>::value, complex<double>>&
val) {
real = val.real;
imag = val.imag;
}
template <typename T1 = T>
HOSTDEVICE explicit complex(
const std::enable_if_t<std::is_same<T1, double>::value, complex<float>>&
val) {
real = val.real;
imag = val.imag;
}
template <typename T1>
HOSTDEVICE inline explicit operator std::complex<T1>() const {
return static_cast<std::complex<T1>>(std::complex<T>(real, imag));
}
template <typename T1>
HOSTDEVICE complex(const std::complex<T1>& val)
: real(val.real()), imag(val.imag()) {}
template <typename T1,
typename std::enable_if<std::is_floating_point<T1>::value ||
std::is_integral<T1>::value,
int>::type = 0>
HOSTDEVICE inline complex& operator=(const T1& val) {
real = static_cast<T>(val);
imag = static_cast<T>(0.0);
return *this;
}
HOSTDEVICE inline explicit operator bool() const {
return static_cast<bool>(this->real) || static_cast<bool>(this->imag);
}
HOSTDEVICE inline explicit operator int8_t() const {
return static_cast<int8_t>(this->real);
}
HOSTDEVICE inline explicit operator uint8_t() const {
return static_cast<uint8_t>(this->real);
}
HOSTDEVICE inline explicit operator int16_t() const {
return static_cast<int16_t>(this->real);
}
HOSTDEVICE inline explicit operator uint16_t() const {
return static_cast<uint16_t>(this->real);
}
HOSTDEVICE inline explicit operator int32_t() const {
return static_cast<int32_t>(this->real);
}
HOSTDEVICE inline explicit operator uint32_t() const {
return static_cast<uint32_t>(this->real);
}
HOSTDEVICE inline explicit operator int64_t() const {
return static_cast<int64_t>(this->real);
}
HOSTDEVICE inline explicit operator uint64_t() const {
return static_cast<uint64_t>(this->real);
}
HOSTDEVICE inline explicit operator float() const {
return static_cast<float>(this->real);
}
HOSTDEVICE inline explicit operator double() const {
return static_cast<double>(this->real);
}
};
template <typename T>
HOSTDEVICE inline complex<T> operator+(const complex<T>& a,
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::complex<T>(a) + thrust::complex<T>(b));
#else
return complex<T>(a.real + b.real, a.imag + b.imag);
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> operator-(const complex<T>& a,
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::complex<T>(a) - thrust::complex<T>(b));
#else
return complex<T>(a.real - b.real, a.imag - b.imag);
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> operator*(const complex<T>& a,
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::complex<T>(a) * thrust::complex<T>(b));
#else
return complex<T>(a.real * b.real - a.imag * b.imag,
a.imag * b.real + b.imag * a.real);
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> operator/(const complex<T>& a,
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::complex<T>(a) / thrust::complex<T>(b));
#else
T denominator = b.real * b.real + b.imag * b.imag;
return complex<T>((a.real * b.real + a.imag * b.imag) / denominator,
(a.imag * b.real - a.real * b.imag) / denominator);
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> operator-(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(-thrust::complex<T>(a.real, a.imag));
#else
complex<T> res;
res.real = -a.real;
res.imag = -a.imag;
return res;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T>& operator+=(complex<T>& a, // NOLINT
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
a = complex<T>(thrust::complex<T>(a.real, a.imag) +=
thrust::complex<T>(b.real, b.imag));
return a;
#else
a.real += b.real;
a.imag += b.imag;
return a;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T>& operator-=(complex<T>& a, // NOLINT
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
a = complex<T>(thrust::complex<T>(a.real, a.imag) -=
thrust::complex<T>(b.real, b.imag));
return a;
#else
a.real -= b.real;
a.imag -= b.imag;
return a;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T>& operator*=(complex<T>& a, // NOLINT
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
a = complex<T>(thrust::complex<T>(a.real, a.imag) *=
thrust::complex<T>(b.real, b.imag));
return a;
#else
a.real = a.real * b.real - a.imag * b.imag;
a.imag = a.imag * b.real + b.imag * a.real;
return a;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T>& operator/=(complex<T>& a, // NOLINT
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
a = complex<T>(thrust::complex<T>(a.real, a.imag) /=
thrust::complex<T>(b.real, b.imag));
return a;
#else
T denominator = b.real * b.real + b.imag * b.imag;
a.real = (a.real * b.real + a.imag * b.imag) / denominator;
a.imag = (a.imag * b.real - a.real * b.imag) / denominator;
return a;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> raw_uint16_to_complex64(uint16_t a) {
complex<T> res;
res.real = a;
res.imag = 0.0;
return res;
}
using complex = pten::dtype::complex<T>;
template <typename T>
HOSTDEVICE inline bool operator==(const complex<T>& a, const complex<T>& b) {
return a.real == b.real && a.imag == b.imag;
}
template <typename T>
HOSTDEVICE inline bool operator!=(const complex<T>& a, const complex<T>& b) {
return a.real != b.real || a.imag != b.imag;
}
template <typename T>
HOSTDEVICE inline bool operator<(const complex<T>& a, const complex<T>& b) {
return a.real < b.real;
}
template <typename T>
HOSTDEVICE inline bool operator<=(const complex<T>& a, const complex<T>& b) {
return a.real <= b.real;
}
template <typename T>
HOSTDEVICE inline bool operator>(const complex<T>& a, const complex<T>& b) {
return a.real > b.real;
}
template <typename T>
HOSTDEVICE inline bool operator>=(const complex<T>& a, const complex<T>& b) {
return a.real >= b.real;
}
template <typename T>
HOSTDEVICE inline complex<T>(max)(const complex<T>& a, const complex<T>& b) {
return (a.real >= b.real) ? a : b;
}
template <typename T>
HOSTDEVICE inline complex<T>(min)(const complex<T>& a, const complex<T>& b) {
return (a.real < b.real) ? a : b;
}
template <typename T>
HOSTDEVICE inline bool(isnan)(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return ::isnan(a.real) || ::isnan(a.imag);
#else
return std::isnan(a.real) || std::isnan(a.imag);
#endif
}
template <typename T>
HOSTDEVICE inline bool isinf(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return ::isinf(a.real) || ::isinf(a.imag);
#else
return std::isinf(a.real) || std::isinf(a.imag);
#endif
}
template <typename T>
HOSTDEVICE inline bool isfinite(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return ::isfinite(a.real) || ::isfinite(a.imag);
#else
return std::isfinite(a.real) || std::isfinite(a.imag);
#endif
}
template <typename T>
HOSTDEVICE inline T abs(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return thrust::abs(thrust::complex<T>(a));
#else
return std::abs(std::complex<T>(a));
#endif
}
template <typename T>
HOSTDEVICE inline T arg(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return thrust::arg(thrust::complex<T>(a));
#else
return std::arg(std::complex<T>(a));
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> pow(const complex<T>& a, const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::pow(thrust::complex<T>(a), thrust::complex<T>(b)));
#else
return complex<T>(std::pow(std::complex<T>(a), std::complex<T>(b)));
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> sqrt(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::sqrt(thrust::complex<T>(a)));
#else
return complex<T>(std::sqrt(std::complex<T>(a)));
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> tanh(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::tanh(thrust::complex<T>(a)));
#else
return complex<T>(std::tanh(std::complex<T>(a)));
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> log(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::log(thrust::complex<T>(a)));
#else
return complex<T>(std::log(std::complex<T>(a)));
#endif
}
template <typename T>
inline std::ostream& operator<<(std::ostream& os, const complex<T>& a) {
os << "real:" << a.real << " imag:" << a.imag;
return os;
}
using namespace pten::dtype; // NOLINT
} // namespace platform
} // namespace paddle
namespace std {
template <typename T>
struct is_pod<paddle::platform::complex<T>> {
static const bool value = true;
};
template <typename T>
struct is_floating_point<paddle::platform::complex<T>>
: std::integral_constant<bool, false> {};
template <typename T>
struct is_signed<paddle::platform::complex<T>> {
static const bool value = false;
};
template <typename T>
struct is_unsigned<paddle::platform::complex<T>> {
static const bool value = false;
};
template <typename T>
inline bool isnan(const paddle::platform::complex<T>& a) {
return paddle::platform::isnan(a);
}
template <typename T>
inline bool isinf(const paddle::platform::complex<T>& a) {
return paddle::platform::isinf(a);
}
template <typename T>
struct numeric_limits<paddle::platform::complex<T>> {
static const bool is_specialized = false;
static const bool is_signed = false;
static const bool is_integer = false;
static const bool is_exact = false;
static const bool has_infinity = false;
static const bool has_quiet_NaN = false;
static const bool has_signaling_NaN = false;
static const float_denorm_style has_denorm = denorm_absent;
static const bool has_denorm_loss = false;
static const std::float_round_style round_style = std::round_toward_zero;
static const bool is_iec559 = false;
static const bool is_bounded = false;
static const bool is_modulo = false;
static const int digits = 0;
static const int digits10 = 0;
static const int max_digits10 = 0;
static const int radix = 0;
static const int min_exponent = 0;
static const int min_exponent10 = 0;
static const int max_exponent = 0;
static const int max_exponent10 = 0;
static const bool traps = false;
static const bool tinyness_before = false;
static paddle::platform::complex<T>(min)() {
return paddle::platform::complex<T>(0.0, 0.0);
}
static paddle::platform::complex<T> lowest() {
return paddle::platform::complex<T>(0.0, 0.0);
}
static paddle::platform::complex<T>(max)() {
return paddle::platform::complex<T>(0.0, 0.0);
}
static paddle::platform::complex<T> epsilon() {
return paddle::platform::complex<T>(0.0, 0.0);
}
static paddle::platform::complex<T> round_error() {
return paddle::platform::complex<T>(0.0, 0.0);
}
static paddle::platform::complex<T> infinity() {
return paddle::platform::complex<T>(0.0, 0.0);
}
static paddle::platform::complex<T> quiet_NaN() {
return paddle::platform::complex<T>(0.0, 0.0);
}
static paddle::platform::complex<T> signaling_NaN() {
return paddle::platform::complex<T>(0.0, 0.0);
}
static paddle::platform::complex<T> denorm_min() {
return paddle::platform::complex<T>(0.0, 0.0);
}
};
} // namespace std
......@@ -23,12 +23,6 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
DECLARE_bool(cudnn_deterministic);
namespace paddle {
......
......@@ -27,12 +27,6 @@ limitations under the License. */
// MIOPEN do not have epslion definition
#define CUDNN_BN_MIN_EPSILON 1e-05
namespace paddle {
namespace platform {
struct float16;
} // namespace platform
} // namespace paddle
DECLARE_bool(cudnn_deterministic);
namespace paddle {
......
此差异已折叠。
# Adapt to custom op mechanism: Include the header files related to the data type
# to avoid exposing the path of the underlying file, remove it after moving
# float16.h/complex.h/bfloat16.h into pten
include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform)
# paddle experimental common components
add_subdirectory(common)
......
......@@ -49,11 +49,11 @@ limitations under the License. */
*/
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream/cuda_stream.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/common/float16.h"
namespace paddle {
namespace experimental {
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <cmath>
#include <cstring>
#include <iostream>
#include <limits>
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif
#if defined(__CUDACC__) && CUDA_VERSION >= 11000
#define PADDLE_CUDA_BF16
#include <cuda_bf16.h>
#endif
#if !defined(_WIN32)
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
#else
#define PADDLE_ALIGN(x) __declspec(align(x))
#endif
#if (defined(__CUDACC__) || defined(__HIPCC__))
#define HOSTDEVICE __host__ __device__
#define DEVICE __device__
#define HOST __host__
#else
#define HOSTDEVICE
#define DEVICE
#define HOST
#endif
namespace pten {
namespace dtype {
struct PADDLE_ALIGN(2) bfloat16 {
public:
uint16_t x;
// Constructors
bfloat16() = default;
bfloat16(const bfloat16& o) = default;
bfloat16& operator=(const bfloat16& o) = default;
bfloat16(bfloat16&& o) = default;
bfloat16& operator=(bfloat16&& o) = default;
~bfloat16() = default;
HOSTDEVICE inline explicit bfloat16(float val) {
#ifdef PADDLE_WITH_HIP
uint32_t res = 0;
uint32_t* tempRes;
// We should be using memcpy in order to respect the strict aliasing rule
// but it fails in the HIP environment.
tempRes = reinterpret_cast<uint32_t*>(&val);
res = *tempRes;
x = res >> 16;
#else
#if defined(PADDLE_CUDA_BF16)
__nv_bfloat16 tmp = __float2bfloat16(val);
x = *reinterpret_cast<uint16_t*>(&tmp);
#else
std::memcpy(&x, reinterpret_cast<char*>(&val) + 2, 2);
#endif
#endif
}
#if defined(PADDLE_CUDA_BF16)
HOSTDEVICE inline explicit bfloat16(const __nv_bfloat16& val) {
x = *reinterpret_cast<const unsigned short*>(&val);
}
#endif
template <class T>
HOSTDEVICE inline explicit bfloat16(const T& val)
: x(bfloat16(static_cast<float>(val)).x) {}
// Assignment operators
#if defined(PADDLE_CUDA_BF16)
HOSTDEVICE inline bfloat16& operator=(const __nv_bfloat16& val) {
x = *reinterpret_cast<const unsigned short*>(&val);
return *this;
}
#endif
HOSTDEVICE inline bfloat16& operator=(bool b) {
x = b ? 0x3f80 : 0;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(int8_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(uint8_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(int16_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(uint16_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(int32_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(uint32_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(int64_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(uint64_t val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(float val) {
x = bfloat16(val).x;
return *this;
}
HOSTDEVICE inline bfloat16& operator=(double val) {
x = bfloat16(val).x;
return *this;
}
// Conversion opertors
HOSTDEVICE inline explicit operator float() const {
#ifdef PADDLE_WITH_HIP
uint32_t res = 0;
// We should be using memcpy in order to respect the strict aliasing rule
// but it fails in the HIP environment.
uint16_t temp = x;
uint16_t* temp_ptr = reinterpret_cast<uint16_t*>(&temp);
res = *temp_ptr;
return res;
#else
#ifdef PADDLE_CUDA_BF16
return __bfloat162float(*reinterpret_cast<const __nv_bfloat16*>(&x));
#else
float val = 0.f;
uint16_t temp = x;
std::memcpy(
reinterpret_cast<char*>(&val) + 2, reinterpret_cast<char*>(&temp), 2);
return val;
#endif
#endif
}
#ifdef PADDLE_CUDA_BF16
HOSTDEVICE inline explicit operator __nv_bfloat16() const {
return *reinterpret_cast<const __nv_bfloat16*>(&x);
}
#endif
HOSTDEVICE inline explicit operator bool() const { return (x & 0x7fff) != 0; }
HOSTDEVICE inline explicit operator int8_t() const {
return static_cast<int8_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator uint8_t() const {
return static_cast<uint8_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator int16_t() const {
return static_cast<int16_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator uint16_t() const {
return static_cast<uint16_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator int32_t() const {
return static_cast<int32_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator uint32_t() const {
return static_cast<uint32_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator int64_t() const {
return static_cast<int64_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator uint64_t() const {
return static_cast<uint64_t>(static_cast<float>(*this));
}
HOSTDEVICE inline explicit operator double() const {
return static_cast<double>(static_cast<float>(*this));
}
};
HOSTDEVICE inline bfloat16 operator+(const bfloat16& a, const bfloat16& b) {
return bfloat16(static_cast<float>(a) + static_cast<float>(b));
}
HOSTDEVICE inline bfloat16 operator-(const bfloat16& a, const bfloat16& b) {
return bfloat16(static_cast<float>(a) - static_cast<float>(b));
}
HOSTDEVICE inline bfloat16 operator*(const bfloat16& a, const bfloat16& b) {
return bfloat16(static_cast<float>(a) * static_cast<float>(b));
}
HOSTDEVICE inline bfloat16 operator/(const bfloat16& a, const bfloat16& b) {
return bfloat16(static_cast<float>(a) / static_cast<float>(b));
}
HOSTDEVICE inline bfloat16 operator-(const bfloat16& a) {
bfloat16 res;
res.x = a.x ^ 0x8000;
return res;
}
HOSTDEVICE inline bfloat16& operator+=(bfloat16& a, // NOLINT
const bfloat16& b) {
a = bfloat16(static_cast<float>(a) + static_cast<float>(b));
return a;
}
HOSTDEVICE inline bfloat16& operator-=(bfloat16& a, // NOLINT
const bfloat16& b) {
a = bfloat16(static_cast<float>(a) - static_cast<float>(b));
return a;
}
HOSTDEVICE inline bfloat16& operator*=(bfloat16& a, // NOLINT
const bfloat16& b) {
a = bfloat16(static_cast<float>(a) * static_cast<float>(b));
return a;
}
HOSTDEVICE inline bfloat16& operator/=(bfloat16& a, // NOLINT
const bfloat16& b) {
a = bfloat16(static_cast<float>(a) / static_cast<float>(b));
return a;
}
HOSTDEVICE inline bfloat16 raw_uint16_to_bfloat16(uint16_t a) {
bfloat16 res;
res.x = a;
return res;
}
// Comparison operators
HOSTDEVICE inline bool operator==(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) == static_cast<float>(b);
}
HOSTDEVICE inline bool operator!=(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) != static_cast<float>(b);
}
HOSTDEVICE inline bool operator<(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) < static_cast<float>(b);
}
HOSTDEVICE inline bool operator<=(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) <= static_cast<float>(b);
}
HOSTDEVICE inline bool operator>(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) > static_cast<float>(b);
}
HOSTDEVICE inline bool operator>=(const bfloat16& a, const bfloat16& b) {
return static_cast<float>(a) >= static_cast<float>(b);
}
HOSTDEVICE inline bool(isnan)(const bfloat16& a) {
return (a.x & 0x7FFF) > 0x7F80;
}
HOSTDEVICE inline bool(isinf)(const bfloat16& a) {
return (a.x & 0x7F80) == 0x7F80;
}
HOSTDEVICE inline bool(isfinite)(const bfloat16& a) {
return !((isnan)(a)) && !((isinf)(a));
}
inline std::ostream& operator<<(std::ostream& os, const bfloat16& a) {
os << a.x;
return os;
}
} // namespace dtype
} // namespace pten
namespace std {
template <>
struct is_pod<pten::dtype::bfloat16> {
static const bool value = is_trivial<pten::dtype::bfloat16>::value &&
is_standard_layout<pten::dtype::bfloat16>::value;
};
template <>
struct is_floating_point<pten::dtype::bfloat16>
: std::integral_constant<
bool,
std::is_same<
pten::dtype::bfloat16,
typename std::remove_cv<pten::dtype::bfloat16>::type>::value> {};
template <>
struct is_signed<pten::dtype::bfloat16> {
static const bool value = true;
};
template <>
struct is_unsigned<pten::dtype::bfloat16> {
static const bool value = false;
};
inline bool isnan(const pten::dtype::bfloat16& a) {
return pten::dtype::isnan(a);
}
inline bool isinf(const pten::dtype::bfloat16& a) {
return pten::dtype::isinf(a);
}
template <>
struct numeric_limits<pten::dtype::bfloat16> {
static const bool is_specialized = true;
static const bool is_signed = true;
static const bool is_integer = false;
static const bool is_exact = false;
static const bool has_infinity = true;
static const bool has_quiet_NaN = true;
static const bool has_signaling_NaN = true;
static const float_denorm_style has_denorm = denorm_present;
static const bool has_denorm_loss = false;
static const std::float_round_style round_style = std::round_to_nearest;
static const bool is_iec559 = false;
static const bool is_bounded = false;
static const bool is_modulo = false;
static const int digits = 8;
static const int digits10 = 2;
static const int max_digits10 = 9;
static const int radix = 2;
static const int min_exponent = -125;
static const int min_exponent10 = -37;
static const int max_exponent = 128;
static const int max_exponent10 = 38;
static const bool traps = true;
static const bool tinyness_before = false;
static pten::dtype::bfloat16(min)() {
return pten::dtype::raw_uint16_to_bfloat16(0x007f);
}
static pten::dtype::bfloat16 lowest() {
return pten::dtype::raw_uint16_to_bfloat16(0xff7f);
}
static pten::dtype::bfloat16(max)() {
return pten::dtype::raw_uint16_to_bfloat16(0x7f7f);
}
static pten::dtype::bfloat16 epsilon() {
return pten::dtype::raw_uint16_to_bfloat16(0x3400);
}
static pten::dtype::bfloat16 round_error() {
return pten::dtype::bfloat16(0.5);
}
static pten::dtype::bfloat16 infinity() {
return pten::dtype::raw_uint16_to_bfloat16(0x7f80);
}
static pten::dtype::bfloat16 quiet_NaN() {
return pten::dtype::raw_uint16_to_bfloat16(0xffc1);
}
static pten::dtype::bfloat16 signaling_NaN() {
return pten::dtype::raw_uint16_to_bfloat16(0xff81);
}
static pten::dtype::bfloat16 denorm_min() {
return pten::dtype::raw_uint16_to_bfloat16(0x0001);
}
};
} // namespace std
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <complex>
#include <cstring>
#include <iostream>
#include <limits>
#ifdef PADDLE_WITH_CUDA
#include <cuComplex.h>
#include <thrust/complex.h>
#endif // PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_HIP
#include <hip/hip_complex.h>
#include <thrust/complex.h> // NOLINT
#endif
#if !defined(_WIN32)
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
#else
#define PADDLE_ALIGN(x) __declspec(align(x))
#endif
#if (defined(__CUDACC__) || defined(__HIPCC__))
#define HOSTDEVICE __host__ __device__
#define DEVICE __device__
#define HOST __host__
#else
#define HOSTDEVICE
#define DEVICE
#define HOST
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// todo
#define PADDLE_WITH_CUDA_OR_HIP_COMPLEX
#endif
namespace pten {
namespace dtype {
template <typename T>
struct PADDLE_ALIGN(sizeof(T) * 2) complex {
public:
T real;
T imag;
using value_type = T;
complex() = default;
complex(const complex<T>& o) = default;
complex& operator=(const complex<T>& o) = default;
complex(complex<T>&& o) = default;
complex& operator=(complex<T>&& o) = default;
~complex() = default;
HOSTDEVICE complex(T real, T imag) : real(real), imag(imag) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <typename T1>
HOSTDEVICE inline explicit complex(const thrust::complex<T1>& c) {
real = c.real();
imag = c.imag();
}
template <typename T1>
HOSTDEVICE inline explicit operator thrust::complex<T1>() const {
return thrust::complex<T1>(real, imag);
}
#ifdef PADDLE_WITH_HIP
HOSTDEVICE inline explicit operator hipFloatComplex() const {
return make_hipFloatComplex(real, imag);
}
HOSTDEVICE inline explicit operator hipDoubleComplex() const {
return make_hipDoubleComplex(real, imag);
}
#else
HOSTDEVICE inline explicit operator cuFloatComplex() const {
return make_cuFloatComplex(real, imag);
}
HOSTDEVICE inline explicit operator cuDoubleComplex() const {
return make_cuDoubleComplex(real, imag);
}
#endif
#endif
template <typename T1,
typename std::enable_if<std::is_floating_point<T1>::value ||
std::is_integral<T1>::value,
int>::type = 0>
HOSTDEVICE complex(const T1& val) {
real = static_cast<T>(val);
imag = static_cast<T>(0.0);
}
template <typename T1 = T>
HOSTDEVICE explicit complex(
const std::enable_if_t<std::is_same<T1, float>::value, complex<double>>&
val) {
real = val.real;
imag = val.imag;
}
template <typename T1 = T>
HOSTDEVICE explicit complex(
const std::enable_if_t<std::is_same<T1, double>::value, complex<float>>&
val) {
real = val.real;
imag = val.imag;
}
template <typename T1>
HOSTDEVICE inline explicit operator std::complex<T1>() const {
return static_cast<std::complex<T1>>(std::complex<T>(real, imag));
}
template <typename T1>
HOSTDEVICE complex(const std::complex<T1>& val)
: real(val.real()), imag(val.imag()) {}
template <typename T1,
typename std::enable_if<std::is_floating_point<T1>::value ||
std::is_integral<T1>::value,
int>::type = 0>
HOSTDEVICE inline complex& operator=(const T1& val) {
real = static_cast<T>(val);
imag = static_cast<T>(0.0);
return *this;
}
HOSTDEVICE inline explicit operator bool() const {
return static_cast<bool>(this->real) || static_cast<bool>(this->imag);
}
HOSTDEVICE inline explicit operator int8_t() const {
return static_cast<int8_t>(this->real);
}
HOSTDEVICE inline explicit operator uint8_t() const {
return static_cast<uint8_t>(this->real);
}
HOSTDEVICE inline explicit operator int16_t() const {
return static_cast<int16_t>(this->real);
}
HOSTDEVICE inline explicit operator uint16_t() const {
return static_cast<uint16_t>(this->real);
}
HOSTDEVICE inline explicit operator int32_t() const {
return static_cast<int32_t>(this->real);
}
HOSTDEVICE inline explicit operator uint32_t() const {
return static_cast<uint32_t>(this->real);
}
HOSTDEVICE inline explicit operator int64_t() const {
return static_cast<int64_t>(this->real);
}
HOSTDEVICE inline explicit operator uint64_t() const {
return static_cast<uint64_t>(this->real);
}
HOSTDEVICE inline explicit operator float() const {
return static_cast<float>(this->real);
}
HOSTDEVICE inline explicit operator double() const {
return static_cast<double>(this->real);
}
};
template <typename T>
HOSTDEVICE inline complex<T> operator+(const complex<T>& a,
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::complex<T>(a) + thrust::complex<T>(b));
#else
return complex<T>(a.real + b.real, a.imag + b.imag);
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> operator-(const complex<T>& a,
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::complex<T>(a) - thrust::complex<T>(b));
#else
return complex<T>(a.real - b.real, a.imag - b.imag);
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> operator*(const complex<T>& a,
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::complex<T>(a) * thrust::complex<T>(b));
#else
return complex<T>(a.real * b.real - a.imag * b.imag,
a.imag * b.real + b.imag * a.real);
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> operator/(const complex<T>& a,
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::complex<T>(a) / thrust::complex<T>(b));
#else
T denominator = b.real * b.real + b.imag * b.imag;
return complex<T>((a.real * b.real + a.imag * b.imag) / denominator,
(a.imag * b.real - a.real * b.imag) / denominator);
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> operator-(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(-thrust::complex<T>(a.real, a.imag));
#else
complex<T> res;
res.real = -a.real;
res.imag = -a.imag;
return res;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T>& operator+=(complex<T>& a, // NOLINT
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
a = complex<T>(thrust::complex<T>(a.real, a.imag) +=
thrust::complex<T>(b.real, b.imag));
return a;
#else
a.real += b.real;
a.imag += b.imag;
return a;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T>& operator-=(complex<T>& a, // NOLINT
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
a = complex<T>(thrust::complex<T>(a.real, a.imag) -=
thrust::complex<T>(b.real, b.imag));
return a;
#else
a.real -= b.real;
a.imag -= b.imag;
return a;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T>& operator*=(complex<T>& a, // NOLINT
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
a = complex<T>(thrust::complex<T>(a.real, a.imag) *=
thrust::complex<T>(b.real, b.imag));
return a;
#else
a.real = a.real * b.real - a.imag * b.imag;
a.imag = a.imag * b.real + b.imag * a.real;
return a;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T>& operator/=(complex<T>& a, // NOLINT
const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
a = complex<T>(thrust::complex<T>(a.real, a.imag) /=
thrust::complex<T>(b.real, b.imag));
return a;
#else
T denominator = b.real * b.real + b.imag * b.imag;
a.real = (a.real * b.real + a.imag * b.imag) / denominator;
a.imag = (a.imag * b.real - a.real * b.imag) / denominator;
return a;
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> raw_uint16_to_complex64(uint16_t a) {
complex<T> res;
res.real = a;
res.imag = 0.0;
return res;
}
template <typename T>
HOSTDEVICE inline bool operator==(const complex<T>& a, const complex<T>& b) {
return a.real == b.real && a.imag == b.imag;
}
template <typename T>
HOSTDEVICE inline bool operator!=(const complex<T>& a, const complex<T>& b) {
return a.real != b.real || a.imag != b.imag;
}
template <typename T>
HOSTDEVICE inline bool operator<(const complex<T>& a, const complex<T>& b) {
return a.real < b.real;
}
template <typename T>
HOSTDEVICE inline bool operator<=(const complex<T>& a, const complex<T>& b) {
return a.real <= b.real;
}
template <typename T>
HOSTDEVICE inline bool operator>(const complex<T>& a, const complex<T>& b) {
return a.real > b.real;
}
template <typename T>
HOSTDEVICE inline bool operator>=(const complex<T>& a, const complex<T>& b) {
return a.real >= b.real;
}
template <typename T>
HOSTDEVICE inline complex<T>(max)(const complex<T>& a, const complex<T>& b) {
return (a.real >= b.real) ? a : b;
}
template <typename T>
HOSTDEVICE inline complex<T>(min)(const complex<T>& a, const complex<T>& b) {
return (a.real < b.real) ? a : b;
}
template <typename T>
HOSTDEVICE inline bool(isnan)(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return ::isnan(a.real) || ::isnan(a.imag);
#else
return std::isnan(a.real) || std::isnan(a.imag);
#endif
}
template <typename T>
HOSTDEVICE inline bool isinf(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return ::isinf(a.real) || ::isinf(a.imag);
#else
return std::isinf(a.real) || std::isinf(a.imag);
#endif
}
template <typename T>
HOSTDEVICE inline bool isfinite(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return ::isfinite(a.real) || ::isfinite(a.imag);
#else
return std::isfinite(a.real) || std::isfinite(a.imag);
#endif
}
template <typename T>
HOSTDEVICE inline T abs(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return thrust::abs(thrust::complex<T>(a));
#else
return std::abs(std::complex<T>(a));
#endif
}
template <typename T>
HOSTDEVICE inline T arg(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return thrust::arg(thrust::complex<T>(a));
#else
return std::arg(std::complex<T>(a));
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> pow(const complex<T>& a, const complex<T>& b) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::pow(thrust::complex<T>(a), thrust::complex<T>(b)));
#else
return complex<T>(std::pow(std::complex<T>(a), std::complex<T>(b)));
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> sqrt(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::sqrt(thrust::complex<T>(a)));
#else
return complex<T>(std::sqrt(std::complex<T>(a)));
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> tanh(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::tanh(thrust::complex<T>(a)));
#else
return complex<T>(std::tanh(std::complex<T>(a)));
#endif
}
template <typename T>
HOSTDEVICE inline complex<T> log(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::log(thrust::complex<T>(a)));
#else
return complex<T>(std::log(std::complex<T>(a)));
#endif
}
template <typename T>
inline std::ostream& operator<<(std::ostream& os, const complex<T>& a) {
os << "real:" << a.real << " imag:" << a.imag;
return os;
}
} // namespace dtype
} // namespace pten
namespace std {
template <typename T>
struct is_pod<pten::dtype::complex<T>> {
static const bool value = true;
};
template <typename T>
struct is_floating_point<pten::dtype::complex<T>>
: std::integral_constant<bool, false> {};
template <typename T>
struct is_signed<pten::dtype::complex<T>> {
static const bool value = false;
};
template <typename T>
struct is_unsigned<pten::dtype::complex<T>> {
static const bool value = false;
};
template <typename T>
inline bool isnan(const pten::dtype::complex<T>& a) {
return pten::dtype::isnan(a);
}
template <typename T>
inline bool isinf(const pten::dtype::complex<T>& a) {
return pten::dtype::isinf(a);
}
template <typename T>
struct numeric_limits<pten::dtype::complex<T>> {
static const bool is_specialized = false;
static const bool is_signed = false;
static const bool is_integer = false;
static const bool is_exact = false;
static const bool has_infinity = false;
static const bool has_quiet_NaN = false;
static const bool has_signaling_NaN = false;
static const float_denorm_style has_denorm = denorm_absent;
static const bool has_denorm_loss = false;
static const std::float_round_style round_style = std::round_toward_zero;
static const bool is_iec559 = false;
static const bool is_bounded = false;
static const bool is_modulo = false;
static const int digits = 0;
static const int digits10 = 0;
static const int max_digits10 = 0;
static const int radix = 0;
static const int min_exponent = 0;
static const int min_exponent10 = 0;
static const int max_exponent = 0;
static const int max_exponent10 = 0;
static const bool traps = false;
static const bool tinyness_before = false;
static pten::dtype::complex<T>(min)() {
return pten::dtype::complex<T>(0.0, 0.0);
}
static pten::dtype::complex<T> lowest() {
return pten::dtype::complex<T>(0.0, 0.0);
}
static pten::dtype::complex<T>(max)() {
return pten::dtype::complex<T>(0.0, 0.0);
}
static pten::dtype::complex<T> epsilon() {
return pten::dtype::complex<T>(0.0, 0.0);
}
static pten::dtype::complex<T> round_error() {
return pten::dtype::complex<T>(0.0, 0.0);
}
static pten::dtype::complex<T> infinity() {
return pten::dtype::complex<T>(0.0, 0.0);
}
static pten::dtype::complex<T> quiet_NaN() {
return pten::dtype::complex<T>(0.0, 0.0);
}
static pten::dtype::complex<T> signaling_NaN() {
return pten::dtype::complex<T>(0.0, 0.0);
}
static pten::dtype::complex<T> denorm_min() {
return pten::dtype::complex<T>(0.0, 0.0);
}
};
} // namespace std
......@@ -14,19 +14,19 @@ limitations under the License. */
#pragma once
#include "bfloat16.h" // NOLINT
#include "complex.h" // NOLINT
#include "float16.h" // NOLINT
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/common/float16.h"
#include "paddle/pten/api/ext/exception.h"
namespace paddle {
namespace experimental {
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
using float16 = ::paddle::platform::float16;
using bfloat16 = ::paddle::platform::bfloat16;
using complex64 = ::pten::dtype::complex<float>;
using complex128 = ::pten::dtype::complex<double>;
using float16 = ::pten::dtype::float16;
using bfloat16 = ::pten::dtype::bfloat16;
enum class DataType {
UNDEFINED = 0,
......
此差异已折叠。
......@@ -15,9 +15,9 @@ limitations under the License. */
#include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/common/float16.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/convert_utils.h"
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/empty_kernel.h"
......
......@@ -19,7 +19,7 @@
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
PT_REGISTER_KERNEL(conj,
CPU,
......
......@@ -18,7 +18,7 @@
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
PT_REGISTER_KERNEL(dot_grad,
CPU,
......
......@@ -18,7 +18,7 @@
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
namespace pten {
......
......@@ -25,8 +25,8 @@
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/complex.h"
namespace pten {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/pten/kernels/matmul_grad_kernel.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/matmul_grad_kernel_impl.h"
......
......@@ -17,7 +17,7 @@ limitations under the License. */
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/kernels/impl/matmul_kernel_impl.h"
PT_REGISTER_KERNEL(matmul,
......
......@@ -22,7 +22,7 @@ limitations under the License. */
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/pten/common/bfloat16.h"
namespace pten {
template <typename T, typename Context>
......
......@@ -19,6 +19,6 @@ limitations under the License. */
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/pten/common/bfloat16.h"
PT_REGISTER_KERNEL(sign, CPU, ALL_LAYOUT, pten::SignKernel, float, double) {}
......@@ -16,7 +16,7 @@
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
namespace pten {
......
......@@ -15,8 +15,8 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/pten/common/float16.h"
namespace pten {
namespace funcs {
......
......@@ -18,9 +18,9 @@
#include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/common/float16.h"
namespace pten {
namespace math {
......
......@@ -19,9 +19,9 @@
#include "paddle/pten/kernels/funcs/transpose.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/common/float16.h"
namespace pten {
......
......@@ -21,10 +21,10 @@
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/aligned_vector.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_helper.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/float16.h"
namespace pten {
......
......@@ -19,7 +19,7 @@
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
PT_REGISTER_KERNEL(conj,
GPU,
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
PT_REGISTER_KERNEL(dot_grad,
GPU,
......
......@@ -20,7 +20,7 @@
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
namespace pten {
......
......@@ -27,9 +27,9 @@ limitations under the License. */
namespace cub = hipcub;
#endif
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/common/float16.h"
#include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h"
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/pten/kernels/matmul_grad_kernel.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/matmul_grad_kernel_impl.h"
......
......@@ -17,7 +17,7 @@ limitations under the License. */
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/pten/common/complex.h"
#include "paddle/pten/kernels/impl/matmul_kernel_impl.h"
PT_REGISTER_KERNEL(matmul,
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/funcs/elementwise_base.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/common/float16.h"
namespace pten {
......
......@@ -19,7 +19,7 @@ limitations under the License. */
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/common/float16.h"
using float16 = paddle::platform::float16;
......
......@@ -569,9 +569,6 @@ headers = (
list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/pten/common')) + # pten common headers
# For paddle uew custom op, only copy data type headers from `paddle/fluid/platform`
# to `paddle/pten/api/ext`,
['@PADDLE_SOURCE_DIR@/paddle/fluid/platform/bfloat16.h'] +
['@PADDLE_SOURCE_DIR@/paddle/fluid/platform/complex.h'] +
['@PADDLE_SOURCE_DIR@/paddle/fluid/platform/float16.h'] +
['@PADDLE_SOURCE_DIR@/paddle/utils/any.h'])
if '${WITH_MKLDNN}' == 'ON':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册